diff --git a/manifests/crd.yaml b/manifests/crd.yaml index 24e9998728..a1717e5497 100644 --- a/manifests/crd.yaml +++ b/manifests/crd.yaml @@ -7060,6 +7060,10 @@ spec: tidb: description: TidbAutoScalerSpec describes the spec for tidb auto-scaling properties: + MetricsTimeDuration: + description: MetricsTimeDuration describe the Time duration to be + queried in the Prometheus + type: string maxReplicas: description: maxReplicas is the upper limit for the number of replicas to which the autoscaler can scale out. It cannot be less than @@ -7090,18 +7094,36 @@ spec: will be set to 500 format: int32 type: integer + scaleInThreshold: + description: ScaleInThreshold describe the consecutive threshold + for the auto-scaling, if the consecutive counts of the scale-in + result in auto-scaling reach this number, the auto-scaling would + be performed. If not set, the default value is 5. + format: int32 + type: integer scaleOutIntervalSeconds: description: ScaleOutIntervalSeconds represents the duration seconds between each auto-scaling-out If not set, the default ScaleOutIntervalSeconds will be set to 300 format: int32 type: integer + scaleOutThreshold: + description: ScaleOutThreshold describe the consecutive threshold + for the auto-scaling, if the consecutive counts of the scale-out + result in auto-scaling reach this number, the auto-scaling would + be performed. If not set, the default value is 3. + format: int32 + type: integer required: - maxReplicas type: object tikv: description: TikvAutoScalerSpec describes the spec for tikv auto-scaling properties: + MetricsTimeDuration: + description: MetricsTimeDuration describe the Time duration to be + queried in the Prometheus + type: string maxReplicas: description: maxReplicas is the upper limit for the number of replicas to which the autoscaler can scale out. It cannot be less than @@ -7132,12 +7154,26 @@ spec: will be set to 500 format: int32 type: integer + scaleInThreshold: + description: ScaleInThreshold describe the consecutive threshold + for the auto-scaling, if the consecutive counts of the scale-in + result in auto-scaling reach this number, the auto-scaling would + be performed. If not set, the default value is 5. + format: int32 + type: integer scaleOutIntervalSeconds: description: ScaleOutIntervalSeconds represents the duration seconds between each auto-scaling-out If not set, the default ScaleOutIntervalSeconds will be set to 300 format: int32 type: integer + scaleOutThreshold: + description: ScaleOutThreshold describe the consecutive threshold + for the auto-scaling, if the consecutive counts of the scale-out + result in auto-scaling reach this number, the auto-scaling would + be performed. If not set, the default value is 3. + format: int32 + type: integer required: - maxReplicas type: object diff --git a/pkg/apis/pingcap/v1alpha1/openapi_generated.go b/pkg/apis/pingcap/v1alpha1/openapi_generated.go index 1808cc0880..42c2712634 100644 --- a/pkg/apis/pingcap/v1alpha1/openapi_generated.go +++ b/pkg/apis/pingcap/v1alpha1/openapi_generated.go @@ -808,6 +808,27 @@ func schema_pkg_apis_pingcap_v1alpha1_BasicAutoScalerSpec(ref common.ReferenceCa }, }, }, + "MetricsTimeDuration": { + SchemaProps: spec.SchemaProps{ + Description: "MetricsTimeDuration describe the Time duration to be queried in the Prometheus", + Type: []string{"string"}, + Format: "", + }, + }, + "scaleOutThreshold": { + SchemaProps: spec.SchemaProps{ + Description: "ScaleOutThreshold describe the consecutive threshold for the auto-scaling, if the consecutive counts of the scale-out result in auto-scaling reach this number, the auto-scaling would be performed. If not set, the default value is 3.", + Type: []string{"integer"}, + Format: "int32", + }, + }, + "scaleInThreshold": { + SchemaProps: spec.SchemaProps{ + Description: "ScaleInThreshold describe the consecutive threshold for the auto-scaling, if the consecutive counts of the scale-in result in auto-scaling reach this number, the auto-scaling would be performed. If not set, the default value is 5.", + Type: []string{"integer"}, + Format: "int32", + }, + }, }, Required: []string{"maxReplicas"}, }, @@ -5684,6 +5705,27 @@ func schema_pkg_apis_pingcap_v1alpha1_TidbAutoScalerSpec(ref common.ReferenceCal }, }, }, + "MetricsTimeDuration": { + SchemaProps: spec.SchemaProps{ + Description: "MetricsTimeDuration describe the Time duration to be queried in the Prometheus", + Type: []string{"string"}, + Format: "", + }, + }, + "scaleOutThreshold": { + SchemaProps: spec.SchemaProps{ + Description: "ScaleOutThreshold describe the consecutive threshold for the auto-scaling, if the consecutive counts of the scale-out result in auto-scaling reach this number, the auto-scaling would be performed. If not set, the default value is 3.", + Type: []string{"integer"}, + Format: "int32", + }, + }, + "scaleInThreshold": { + SchemaProps: spec.SchemaProps{ + Description: "ScaleInThreshold describe the consecutive threshold for the auto-scaling, if the consecutive counts of the scale-in result in auto-scaling reach this number, the auto-scaling would be performed. If not set, the default value is 5.", + Type: []string{"integer"}, + Format: "int32", + }, + }, }, Required: []string{"maxReplicas"}, }, @@ -6563,6 +6605,27 @@ func schema_pkg_apis_pingcap_v1alpha1_TikvAutoScalerSpec(ref common.ReferenceCal }, }, }, + "MetricsTimeDuration": { + SchemaProps: spec.SchemaProps{ + Description: "MetricsTimeDuration describe the Time duration to be queried in the Prometheus", + Type: []string{"string"}, + Format: "", + }, + }, + "scaleOutThreshold": { + SchemaProps: spec.SchemaProps{ + Description: "ScaleOutThreshold describe the consecutive threshold for the auto-scaling, if the consecutive counts of the scale-out result in auto-scaling reach this number, the auto-scaling would be performed. If not set, the default value is 3.", + Type: []string{"integer"}, + Format: "int32", + }, + }, + "scaleInThreshold": { + SchemaProps: spec.SchemaProps{ + Description: "ScaleInThreshold describe the consecutive threshold for the auto-scaling, if the consecutive counts of the scale-in result in auto-scaling reach this number, the auto-scaling would be performed. If not set, the default value is 5.", + Type: []string{"integer"}, + Format: "int32", + }, + }, }, Required: []string{"maxReplicas"}, }, diff --git a/pkg/apis/pingcap/v1alpha1/tidbclusterautoscaler_types.go b/pkg/apis/pingcap/v1alpha1/tidbclusterautoscaler_types.go index 749c48082e..af8ae40128 100644 --- a/pkg/apis/pingcap/v1alpha1/tidbclusterautoscaler_types.go +++ b/pkg/apis/pingcap/v1alpha1/tidbclusterautoscaler_types.go @@ -112,6 +112,24 @@ type BasicAutoScalerSpec struct { // If not set, the default metric will be set to 80% average CPU utilization. // +optional Metrics []v2beta2.MetricSpec `json:"metrics,omitempty"` + + // MetricsTimeDuration describe the Time duration to be queried in the Prometheus + // +optional + MetricsTimeDuration *string `json:"MetricsTimeDuration,omitempty"` + + // ScaleOutThreshold describe the consecutive threshold for the auto-scaling, + // if the consecutive counts of the scale-out result in auto-scaling reach this number, + // the auto-scaling would be performed. + // If not set, the default value is 3. + // +optional + ScaleOutThreshold *int32 `json:"scaleOutThreshold,omitempty"` + + // ScaleInThreshold describe the consecutive threshold for the auto-scaling, + // if the consecutive counts of the scale-in result in auto-scaling reach this number, + // the auto-scaling would be performed. + // If not set, the default value is 5. + // +optional + ScaleInThreshold *int32 `json:"scaleInThreshold,omitempty"` } // TODO: sync status diff --git a/pkg/apis/pingcap/v1alpha1/zz_generated.deepcopy.go b/pkg/apis/pingcap/v1alpha1/zz_generated.deepcopy.go index 13425dc401..b45b2e3116 100644 --- a/pkg/apis/pingcap/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/apis/pingcap/v1alpha1/zz_generated.deepcopy.go @@ -339,6 +339,21 @@ func (in *BasicAutoScalerSpec) DeepCopyInto(out *BasicAutoScalerSpec) { (*in)[i].DeepCopyInto(&(*out)[i]) } } + if in.MetricsTimeDuration != nil { + in, out := &in.MetricsTimeDuration, &out.MetricsTimeDuration + *out = new(string) + **out = **in + } + if in.ScaleOutThreshold != nil { + in, out := &in.ScaleOutThreshold, &out.ScaleOutThreshold + *out = new(int32) + **out = **in + } + if in.ScaleInThreshold != nil { + in, out := &in.ScaleInThreshold, &out.ScaleInThreshold + *out = new(int32) + **out = **in + } return } diff --git a/pkg/autoscaler/autoscaler/autoscaler_manager.go b/pkg/autoscaler/autoscaler/autoscaler_manager.go index 79cca7491c..58528ea346 100644 --- a/pkg/autoscaler/autoscaler/autoscaler_manager.go +++ b/pkg/autoscaler/autoscaler/autoscaler_manager.go @@ -20,6 +20,7 @@ import ( informers "github.com/pingcap/tidb-operator/pkg/client/informers/externalversions" v1alpha1listers "github.com/pingcap/tidb-operator/pkg/client/listers/pingcap/v1alpha1" promClient "github.com/prometheus/client_golang/api" + "k8s.io/apimachinery/pkg/api/errors" kubeinformers "k8s.io/client-go/informers" appslisters "k8s.io/client-go/listers/apps/v1" "k8s.io/client-go/tools/record" @@ -57,8 +58,14 @@ func (am *autoScalerManager) Sync(tac *v1alpha1.TidbClusterAutoScaler) error { tcNamespace := tac.Spec.Cluster.Namespace tc, err := am.tcLister.TidbClusters(tcNamespace).Get(tcName) if err != nil { + if errors.IsNotFound(err) { + // Target TidbCluster Ref is deleted, empty the auto-scaling status + emptyAutoScalingCountAnn(tac, v1alpha1.TiDBMemberType) + emptyAutoScalingCountAnn(tac, v1alpha1.TiKVMemberType) + } return err } + checkAndUpdateTacAnn(tac) oldTCSpec := tc.Spec.DeepCopy() if err := am.syncAutoScaling(tc, tac); err != nil { return err @@ -94,7 +101,7 @@ func (am *autoScalerManager) syncTidbClusterReplicas(tc *v1alpha1.TidbCluster, o } //TODO: sync tac status -func (am *autoScalerManager) syncAutoScalingStatus(tc *v1alpha1.TidbCluster, oldTCSpec *v1alpha1.TidbClusterSpec, +func (am *autoScalerManager) syncAutoScalingStatus(tc *v1alpha1.TidbCluster, oldTc *v1alpha1.TidbClusterSpec, tac *v1alpha1.TidbClusterAutoScaler) error { return nil } diff --git a/pkg/autoscaler/autoscaler/tidb_autoscaler.go b/pkg/autoscaler/autoscaler/tidb_autoscaler.go index bd943e3dfa..7eb4aabdf9 100644 --- a/pkg/autoscaler/autoscaler/tidb_autoscaler.go +++ b/pkg/autoscaler/autoscaler/tidb_autoscaler.go @@ -24,6 +24,7 @@ import ( func (am *autoScalerManager) syncTiDB(tc *v1alpha1.TidbCluster, tac *v1alpha1.TidbClusterAutoScaler, client promClient.Client) error { if tac.Spec.TiDB == nil { + emptyAutoScalingCountAnn(tac, v1alpha1.TiDBMemberType) return nil } sts, err := am.stsLister.StatefulSets(tc.Namespace).Get(operatorUtils.GetStatefulSetName(tc, v1alpha1.TiDBMemberType)) @@ -31,31 +32,51 @@ func (am *autoScalerManager) syncTiDB(tc *v1alpha1.TidbCluster, tac *v1alpha1.Ti return err } if !checkAutoScalingPrerequisites(tc, sts, v1alpha1.TiDBMemberType) { + emptyAutoScalingCountAnn(tac, v1alpha1.TiDBMemberType) return nil } - targetReplicas := tc.Spec.TiDB.Replicas - - // TODO: sync tidb.metrics from prometheus - // rate(process_cpu_seconds_total{cluster="tidb",job="tidb"}[threshold Minute]) - //for _, _ = range tac.Spec.TiDB.Metrics { - // // revive:disable:empty-block - //} + currentReplicas := tc.Spec.TiDB.Replicas + targetReplicas := calculateRecommendedReplicas(tac, v1alpha1.TiDBMemberType, client) targetReplicas = limitTargetReplicas(targetReplicas, tac, v1alpha1.TiDBMemberType) if targetReplicas == tc.Spec.TiDB.Replicas { + emptyAutoScalingCountAnn(tac, v1alpha1.TiDBMemberType) + return nil + } + return syncTiDBAfterCalculated(tc, tac, currentReplicas, targetReplicas) +} + +// syncTiDBAfterCalculated would check the Consecutive count to avoid jitter, and it would also check the interval +// duration between each auto-scaling. If either of them is not meet, the auto-scaling would be rejected. +// If the auto-scaling is permitted, the timestamp would be recorded and the Consecutive count would be zeroed. +func syncTiDBAfterCalculated(tc *v1alpha1.TidbCluster, tac *v1alpha1.TidbClusterAutoScaler, currentReplicas, recommendedReplicas int32) error { + if err := updateConsecutiveCount(tac, v1alpha1.TiDBMemberType, currentReplicas, recommendedReplicas); err != nil { + return err + } + + ableToScale, err := checkConsecutiveCount(tac, v1alpha1.TiDBMemberType, currentReplicas, recommendedReplicas) + if err != nil { + return err + } + if !ableToScale { return nil } intervalSeconds := tac.Spec.TiDB.ScaleInIntervalSeconds - if targetReplicas > tc.Spec.TiDB.Replicas { + if recommendedReplicas > currentReplicas { intervalSeconds = tac.Spec.TiDB.ScaleOutIntervalSeconds } - ableToScale, err := checkStsAutoScalingInterval(tc, *intervalSeconds, v1alpha1.TiDBMemberType) + ableToScale, err = checkStsAutoScalingInterval(tc, *intervalSeconds, v1alpha1.TiDBMemberType) if err != nil { return err } if !ableToScale { return nil } - tc.Spec.Annotations[label.AnnTiDBLastAutoScalingTimestamp] = time.Now().String() - tc.Spec.TiDB.Replicas = targetReplicas + updateTcTiDBAnnIfScale(tac) + tc.Spec.TiDB.Replicas = recommendedReplicas return nil } + +func updateTcTiDBAnnIfScale(tac *v1alpha1.TidbClusterAutoScaler) { + tac.Annotations[label.AnnTiDBLastAutoScalingTimestamp] = time.Now().String() + emptyAutoScalingCountAnn(tac, v1alpha1.TiDBMemberType) +} diff --git a/pkg/autoscaler/autoscaler/tidb_autoscaler_test.go b/pkg/autoscaler/autoscaler/tidb_autoscaler_test.go new file mode 100644 index 0000000000..3f7e128725 --- /dev/null +++ b/pkg/autoscaler/autoscaler/tidb_autoscaler_test.go @@ -0,0 +1,114 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package autoscaler + +import ( + "fmt" + "testing" + + . "github.com/onsi/gomega" + "github.com/pingcap/tidb-operator/pkg/label" +) + +func TestSyncTiDBAfterCalculated(t *testing.T) { + g := NewGomegaWithT(t) + + type testcase struct { + name string + currentReplicas int32 + recommendedReplicas int32 + currentScaleInCount int32 + currentScaleOutCount int32 + expectedScaleOutAnnValue string + expectedScaleInAnnValue string + autoScalingPermitted bool + } + testFn := func(test *testcase) { + t.Log(test.name) + + tac := newTidbClusterAutoScaler() + tc := newTidbCluster() + tc.Spec.TiDB.Replicas = test.currentReplicas + tac.Annotations[label.AnnTiDBConsecutiveScaleInCount] = fmt.Sprintf("%d", test.currentScaleInCount) + tac.Annotations[label.AnnTiDBConsecutiveScaleOutCount] = fmt.Sprintf("%d", test.currentScaleOutCount) + + err := syncTiDBAfterCalculated(tc, tac, test.currentReplicas, test.recommendedReplicas) + g.Expect(err).ShouldNot(HaveOccurred()) + + _, existed := tac.Annotations[label.AnnTiDBLastAutoScalingTimestamp] + g.Expect(existed).Should(Equal(test.autoScalingPermitted)) + if test.autoScalingPermitted { + g.Expect(tc.Spec.TiDB.Replicas).Should(Equal(test.recommendedReplicas)) + } else { + g.Expect(tc.Spec.TiDB.Replicas).Should(Equal(test.currentReplicas)) + } + + } + tests := []testcase{ + { + name: "scale out, permitted", + currentReplicas: 2, + recommendedReplicas: 3, + currentScaleOutCount: 1, + currentScaleInCount: 0, + expectedScaleOutAnnValue: "0", + expectedScaleInAnnValue: "0", + autoScalingPermitted: true, + }, + { + name: "scale out, rejected", + currentReplicas: 2, + recommendedReplicas: 3, + currentScaleOutCount: 0, + currentScaleInCount: 1, + expectedScaleOutAnnValue: "1", + expectedScaleInAnnValue: "0", + autoScalingPermitted: false, + }, + { + name: "scale in, permitted", + currentReplicas: 3, + recommendedReplicas: 2, + currentScaleOutCount: 0, + currentScaleInCount: 1, + expectedScaleInAnnValue: "0", + expectedScaleOutAnnValue: "0", + autoScalingPermitted: true, + }, + { + name: "scale in, rejected", + currentReplicas: 3, + recommendedReplicas: 2, + currentScaleOutCount: 1, + currentScaleInCount: 0, + expectedScaleOutAnnValue: "0", + expectedScaleInAnnValue: "1", + autoScalingPermitted: false, + }, + { + name: "no scaling", + currentReplicas: 2, + recommendedReplicas: 2, + currentScaleOutCount: 1, + currentScaleInCount: 0, + expectedScaleInAnnValue: "0", + expectedScaleOutAnnValue: "0", + autoScalingPermitted: false, + }, + } + + for _, test := range tests { + testFn(&test) + } +} diff --git a/pkg/autoscaler/autoscaler/tikv_autoscaler.go b/pkg/autoscaler/autoscaler/tikv_autoscaler.go index 256b9a6a9c..7f7b817777 100644 --- a/pkg/autoscaler/autoscaler/tikv_autoscaler.go +++ b/pkg/autoscaler/autoscaler/tikv_autoscaler.go @@ -24,6 +24,7 @@ import ( func (am *autoScalerManager) syncTiKV(tc *v1alpha1.TidbCluster, tac *v1alpha1.TidbClusterAutoScaler, client promClient.Client) error { if tac.Spec.TiKV == nil { + emptyAutoScalingCountAnn(tac, v1alpha1.TiKVMemberType) return nil } sts, err := am.stsLister.StatefulSets(tc.Namespace).Get(operatorUtils.GetStatefulSetName(tc, v1alpha1.TiKVMemberType)) @@ -31,31 +32,64 @@ func (am *autoScalerManager) syncTiKV(tc *v1alpha1.TidbCluster, tac *v1alpha1.Ti return err } if !checkAutoScalingPrerequisites(tc, sts, v1alpha1.TiKVMemberType) { + emptyAutoScalingCountAnn(tac, v1alpha1.TiKVMemberType) return nil } - targetReplicas := tc.Spec.TiKV.Replicas - - // TODO: sync tikv .metrics from prometheus - // sum(rate(tikv_grpc_msg_duration_seconds_count{cluster="tidb", type!="kv_gc"}[1m])) by (instance) - //for _, _ = range tac.Spec.TiKV.Metrics { - // // revive:disable:empty-block - //} + currentReplicas := getStateUpReplicas(tc) + targetReplicas := calculateRecommendedReplicas(tac, v1alpha1.TiKVMemberType, client) targetReplicas = limitTargetReplicas(targetReplicas, tac, v1alpha1.TiKVMemberType) if targetReplicas == tc.Spec.TiKV.Replicas { + emptyAutoScalingCountAnn(tac, v1alpha1.TiKVMemberType) + return nil + } + return syncTiKVAfterCalculated(tc, tac, currentReplicas, targetReplicas) +} + +// syncTiKVAfterCalculated would check the Consecutive count to avoid jitter, and it would also check the interval +// duration between each auto-scaling. If either of them is not meet, the auto-scaling would be rejected. +// If the auto-scaling is permitted, the timestamp would be recorded and the Consecutive count would be zeroed. +// The currentReplicas of TiKV calculated in auto-scaling is the count of the StateUp TiKV instance, so we need to +// add the number of other state tikv instance replicas when we update the TidbCluster.Spec.TiKV.Replicas +func syncTiKVAfterCalculated(tc *v1alpha1.TidbCluster, tac *v1alpha1.TidbClusterAutoScaler, currentReplicas, recommendedReplicas int32) error { + if err := updateConsecutiveCount(tac, v1alpha1.TiKVMemberType, currentReplicas, recommendedReplicas); err != nil { + return err + } + + ableToScale, err := checkConsecutiveCount(tac, v1alpha1.TiKVMemberType, currentReplicas, recommendedReplicas) + if err != nil { + return err + } + if !ableToScale { return nil } + intervalSeconds := tac.Spec.TiKV.ScaleInIntervalSeconds - if targetReplicas > tc.Spec.TiKV.Replicas { + if recommendedReplicas > tc.Spec.TiKV.Replicas { intervalSeconds = tac.Spec.TiKV.ScaleOutIntervalSeconds } - ableToScale, err := checkStsAutoScalingInterval(tc, *intervalSeconds, v1alpha1.TiKVMemberType) + ableToScale, err = checkStsAutoScalingInterval(tc, *intervalSeconds, v1alpha1.TiKVMemberType) if err != nil { return err } if !ableToScale { return nil } - tc.Spec.Annotations[label.AnnTiKVLastAutoScalingTimestamp] = time.Now().String() - tc.Spec.TiKV.Replicas = targetReplicas + updateTcTiKVAnnIfScale(tac) + tc.Spec.TiKV.Replicas = recommendedReplicas return nil } + +func getStateUpReplicas(tc *v1alpha1.TidbCluster) int32 { + count := 0 + for _, store := range tc.Status.TiKV.Stores { + if store.State == v1alpha1.TiKVStateUp { + count = count + 1 + } + } + return int32(count) +} + +func updateTcTiKVAnnIfScale(tac *v1alpha1.TidbClusterAutoScaler) { + tac.Annotations[label.AnnTiKVLastAutoScalingTimestamp] = time.Now().String() + emptyAutoScalingCountAnn(tac, v1alpha1.TiKVMemberType) +} diff --git a/pkg/autoscaler/autoscaler/tikv_autoscaler_test.go b/pkg/autoscaler/autoscaler/tikv_autoscaler_test.go new file mode 100644 index 0000000000..6a2647c357 --- /dev/null +++ b/pkg/autoscaler/autoscaler/tikv_autoscaler_test.go @@ -0,0 +1,113 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package autoscaler + +import ( + "fmt" + "testing" + + . "github.com/onsi/gomega" + "github.com/pingcap/tidb-operator/pkg/label" +) + +func TestSyncTiKVAfterCalculated(t *testing.T) { + g := NewGomegaWithT(t) + + type testcase struct { + name string + currentReplicas int32 + recommendedReplicas int32 + currentScaleInCount int32 + currentScaleOutCount int32 + expectedScaleOutAnnValue string + expectedScaleInAnnValue string + autoScalingPermitted bool + } + testFn := func(test *testcase) { + t.Log(test.name) + + tac := newTidbClusterAutoScaler() + tc := newTidbCluster() + tc.Spec.TiKV.Replicas = test.currentReplicas + tac.Annotations[label.AnnTiKVConsecutiveScaleInCount] = fmt.Sprintf("%d", test.currentScaleInCount) + tac.Annotations[label.AnnTiKVConsecutiveScaleOutCount] = fmt.Sprintf("%d", test.currentScaleOutCount) + + err := syncTiKVAfterCalculated(tc, tac, test.currentReplicas, test.recommendedReplicas) + g.Expect(err).ShouldNot(HaveOccurred()) + + _, existed := tac.Annotations[label.AnnTiKVLastAutoScalingTimestamp] + g.Expect(existed).Should(Equal(test.autoScalingPermitted)) + if test.autoScalingPermitted { + g.Expect(tc.Spec.TiKV.Replicas).Should(Equal(test.recommendedReplicas)) + } else { + g.Expect(tc.Spec.TiKV.Replicas).Should(Equal(test.currentReplicas)) + } + } + + tests := []*testcase{ + { + name: "tikv scale-out,permitted, no failure Instance", + currentReplicas: 3, + recommendedReplicas: 4, + currentScaleInCount: 0, + currentScaleOutCount: 1, + expectedScaleOutAnnValue: "0", + expectedScaleInAnnValue: "0", + autoScalingPermitted: true, + }, + { + name: "tikv scale-out, rejected, no failure instance", + currentReplicas: 3, + recommendedReplicas: 4, + currentScaleInCount: 1, + currentScaleOutCount: 0, + expectedScaleInAnnValue: "0", + expectedScaleOutAnnValue: "1", + autoScalingPermitted: false, + }, + { + name: "tikv scale-in, permitted, no failure instance", + currentReplicas: 4, + recommendedReplicas: 3, + currentScaleInCount: 1, + currentScaleOutCount: 0, + expectedScaleInAnnValue: "0", + expectedScaleOutAnnValue: "0", + autoScalingPermitted: true, + }, + { + name: "tikv scale-in, rejected, no failure instace", + currentReplicas: 4, + recommendedReplicas: 3, + currentScaleInCount: 0, + currentScaleOutCount: 0, + expectedScaleInAnnValue: "1", + expectedScaleOutAnnValue: "0", + autoScalingPermitted: false, + }, + { + name: "tikv no-scaling, no failure", + currentReplicas: 3, + recommendedReplicas: 3, + currentScaleInCount: 1, + currentScaleOutCount: 0, + expectedScaleInAnnValue: "0", + expectedScaleOutAnnValue: "0", + autoScalingPermitted: false, + }, + } + for _, test := range tests { + testFn(test) + } +} diff --git a/pkg/autoscaler/autoscaler/util.go b/pkg/autoscaler/autoscaler/util.go index 6ed74d8456..4632c6c8c4 100644 --- a/pkg/autoscaler/autoscaler/util.go +++ b/pkg/autoscaler/autoscaler/util.go @@ -14,18 +14,28 @@ package autoscaler import ( + "fmt" "strconv" "time" "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1" "github.com/pingcap/tidb-operator/pkg/label" operatorUtils "github.com/pingcap/tidb-operator/pkg/util" + promClient "github.com/prometheus/client_golang/api" appsv1 "k8s.io/api/apps/v1" autoscalingv2beta2 "k8s.io/api/autoscaling/v2beta2" corev1 "k8s.io/api/core/v1" "k8s.io/utils/pointer" ) +const ( + annScaleOutSuffix = "tidb.pingcap.com/consecutive-scale-out-count" + annScaleInSuffix = "tidb.pingcap.com/consecutive-scale-in-count" + + invalidMemberTypeErrorMsg = "tac[%s/%s] invalid set MemberType:%s" + invalidTacAnnotationErrorMsg = "tac[%s/%s] annotation invalid set,err:%v" +) + var defaultMetricSpec = autoscalingv2beta2.MetricSpec{ Type: autoscalingv2beta2.ResourceMetricSourceType, Resource: &autoscalingv2beta2.ResourceMetricSource{ @@ -49,13 +59,13 @@ func checkStsAutoScalingPrerequisites(set *appsv1.StatefulSet) bool { } // checkStsAutoScalingInterval would check whether there is enough interval duration between every two auto-scaling -func checkStsAutoScalingInterval(tc *v1alpha1.TidbCluster, intervalSeconds int32, memberType v1alpha1.MemberType) (bool, error) { - if tc.Annotations == nil { - tc.Annotations = map[string]string{} +func checkStsAutoScalingInterval(tac *v1alpha1.TidbCluster, intervalSeconds int32, memberType v1alpha1.MemberType) (bool, error) { + if tac.Annotations == nil { + tac.Annotations = map[string]string{} } - lastAutoScalingTimestamp, existed := tc.Annotations[label.AnnTiDBLastAutoScalingTimestamp] + lastAutoScalingTimestamp, existed := tac.Annotations[label.AnnTiDBLastAutoScalingTimestamp] if memberType == v1alpha1.TiKVMemberType { - lastAutoScalingTimestamp, existed = tc.Annotations[label.AnnTiKVLastAutoScalingTimestamp] + lastAutoScalingTimestamp, existed = tac.Annotations[label.AnnTiKVLastAutoScalingTimestamp] } if !existed { return true, nil @@ -126,7 +136,23 @@ func defaultTAC(tac *v1alpha1.TidbClusterAutoScaler) { if len(tac.Spec.TiKV.Metrics) == 0 { tac.Spec.TiKV.Metrics = append(tac.Spec.TiKV.Metrics, defaultMetricSpec) } + if tac.Spec.TiKV.ScaleInThreshold == nil { + tac.Spec.TiKV.ScaleInThreshold = pointer.Int32Ptr(5) + } + if tac.Spec.TiKV.ScaleOutThreshold == nil { + tac.Spec.TiKV.ScaleOutThreshold = pointer.Int32Ptr(3) + } + if tac.Spec.TiKV.MetricsTimeDuration == nil { + tac.Spec.TiKV.MetricsTimeDuration = pointer.StringPtr("3m") + } + if tac.Spec.TiKV.ScaleOutIntervalSeconds == nil { + tac.Spec.TiKV.ScaleOutIntervalSeconds = pointer.Int32Ptr(300) + } + if tac.Spec.TiKV.ScaleInIntervalSeconds == nil { + tac.Spec.TiKV.ScaleInIntervalSeconds = pointer.Int32Ptr(500) + } } + if tac.Spec.TiDB != nil { if tac.Spec.TiDB.MinReplicas == nil { tac.Spec.TiDB.MinReplicas = pointer.Int32Ptr(1) @@ -134,5 +160,158 @@ func defaultTAC(tac *v1alpha1.TidbClusterAutoScaler) { if len(tac.Spec.TiDB.Metrics) == 0 { tac.Spec.TiDB.Metrics = append(tac.Spec.TiDB.Metrics, defaultMetricSpec) } + if tac.Spec.TiDB.ScaleInThreshold == nil { + tac.Spec.TiDB.ScaleInThreshold = pointer.Int32Ptr(5) + } + if tac.Spec.TiDB.ScaleOutThreshold == nil { + tac.Spec.TiDB.ScaleOutThreshold = pointer.Int32Ptr(3) + } + if tac.Spec.TiDB.MetricsTimeDuration == nil { + tac.Spec.TiDB.MetricsTimeDuration = pointer.StringPtr("3m") + } + if tac.Spec.TiDB.ScaleOutIntervalSeconds == nil { + tac.Spec.TiDB.ScaleOutIntervalSeconds = pointer.Int32Ptr(300) + } + if tac.Spec.TiDB.ScaleInIntervalSeconds == nil { + tac.Spec.TiDB.ScaleInIntervalSeconds = pointer.Int32Ptr(500) + } + } +} + +// updateConsecutiveCount would update the tc annotation depended by the given replicas in each reconciling +func updateConsecutiveCount(tac *v1alpha1.TidbClusterAutoScaler, + memberType v1alpha1.MemberType, currentReplicas int32, recommendedReplicas int32) error { + if tac.Annotations == nil { + tac.Annotations = map[string]string{} + } + + targetScaleOutAnn := fmt.Sprintf("%s.%s", memberType.String(), annScaleOutSuffix) + targetScaleInAnn := fmt.Sprintf("%s.%s", memberType.String(), annScaleInSuffix) + + var scaleOutCount int + var scaleInCount int + scaleOutCount, scaleInCount = 0, 0 + var err error + + if v, existed := tac.Annotations[targetScaleOutAnn]; existed { + scaleOutCount, err = strconv.Atoi(v) + if err != nil { + return fmt.Errorf(invalidTacAnnotationErrorMsg, tac.Namespace, tac.Name, err) + } + } + + if v, existed := tac.Annotations[targetScaleInAnn]; existed { + scaleInCount, err = strconv.Atoi(v) + if err != nil { + return fmt.Errorf(invalidTacAnnotationErrorMsg, tac.Namespace, tac.Name, err) + } + } + + if currentReplicas < recommendedReplicas { + // scale-out + scaleOutCount = scaleOutCount + 1 + scaleInCount = 0 + } else if currentReplicas > recommendedReplicas { + // scale-in + scaleOutCount = 0 + scaleInCount = scaleInCount + 1 + } else { + scaleOutCount = 0 + scaleInCount = 0 + } + + // update tc annotation + tac.Annotations[targetScaleOutAnn] = fmt.Sprintf("%d", scaleOutCount) + tac.Annotations[targetScaleInAnn] = fmt.Sprintf("%d", scaleInCount) + return nil +} + +func checkConsecutiveCount(tac *v1alpha1.TidbClusterAutoScaler, + memberType v1alpha1.MemberType, currentReplicas int32, recommendedReplicas int32) (bool, error) { + if currentReplicas == recommendedReplicas { + return false, nil + } + targetScaleOutAnn := fmt.Sprintf("%s.%s", memberType.String(), annScaleOutSuffix) + targetScaleInAnn := fmt.Sprintf("%s.%s", memberType.String(), annScaleInSuffix) + if tac.Annotations == nil { + return false, fmt.Errorf(invalidTacAnnotationErrorMsg, tac.Namespace, tac.Name, "tac annotations empty") + } + currentScaleOutCount, err := strconv.ParseInt(tac.Annotations[targetScaleOutAnn], 10, 32) + if err != nil { + return false, err + } + currentScaleInCount, err := strconv.ParseInt(tac.Annotations[targetScaleInAnn], 10, 32) + if err != nil { + return false, err + } + switch memberType { + case v1alpha1.TiDBMemberType: + if currentReplicas < recommendedReplicas { + // scale-out + if int32(currentScaleOutCount) < *tac.Spec.TiDB.ScaleOutThreshold { + return false, nil + } + } else { + // scale-in, no-scaling would be return nil at first + if int32(currentScaleInCount) < *tac.Spec.TiDB.ScaleInThreshold { + return false, nil + } + } + case v1alpha1.TiKVMemberType: + if currentReplicas < recommendedReplicas { + // scale-out + if int32(currentScaleOutCount) < *tac.Spec.TiKV.ScaleOutThreshold { + return false, nil + } + } else { + // scale-in, no-scaling would be return nil at first + if int32(currentScaleInCount) < *tac.Spec.TiDB.ScaleInThreshold { + return false, nil + } + } + default: + return false, fmt.Errorf(invalidMemberTypeErrorMsg, tac.Namespace, tac.Name, memberType) + } + return true, nil +} + +func emptyAutoScalingCountAnn(tac *v1alpha1.TidbClusterAutoScaler, memberType v1alpha1.MemberType) { + targetScaleOutAnn := fmt.Sprintf("%s.%s", memberType.String(), annScaleOutSuffix) + targetScaleInAnn := fmt.Sprintf("%s.%s", memberType.String(), annScaleInSuffix) + if tac.Annotations == nil { + tac.Annotations = map[string]string{} + } + tac.Annotations[targetScaleOutAnn] = "0" + tac.Annotations[targetScaleInAnn] = "0" +} + +//TODO: calculate the recommended replicas from Prometheus +func calculateRecommendedReplicas(tac *v1alpha1.TidbClusterAutoScaler, memberType v1alpha1.MemberType, + client promClient.Client) int32 { + return 0 +} + +func resetAutoScalingAnn(tac *v1alpha1.TidbClusterAutoScaler) { + emptyAutoScalingCountAnn(tac, v1alpha1.TiDBMemberType) + emptyAutoScalingCountAnn(tac, v1alpha1.TiKVMemberType) + tac.Annotations[label.AnnAutoScalingTargetNamespace] = tac.Spec.Cluster.Namespace + tac.Annotations[label.AnnAutoScalingTargetName] = tac.Spec.Cluster.Name +} + +// checkAndUpdateTacRef would compare the target tidbcluster ref stored in the annotations +// and in the Spec. It not equal, the previous stored status would be empty and the stored Ref +// would be updated. +func checkAndUpdateTacAnn(tac *v1alpha1.TidbClusterAutoScaler) { + if tac.Annotations == nil { + tac.Annotations = map[string]string{} + resetAutoScalingAnn(tac) + return + } + name := tac.Annotations[label.AnnAutoScalingTargetName] + namespace := tac.Annotations[label.AnnAutoScalingTargetNamespace] + if name == tac.Spec.Cluster.Name && namespace == tac.Spec.Cluster.Namespace { + return } + // If not satisfied, reset tac Ann + resetAutoScalingAnn(tac) } diff --git a/pkg/autoscaler/autoscaler/util_test.go b/pkg/autoscaler/autoscaler/util_test.go new file mode 100644 index 0000000000..df0f542344 --- /dev/null +++ b/pkg/autoscaler/autoscaler/util_test.go @@ -0,0 +1,198 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package autoscaler + +import ( + "fmt" + "testing" + + . "github.com/onsi/gomega" + "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1" + "k8s.io/utils/pointer" +) + +func TestUpdateConsecutiveCount(t *testing.T) { + g := NewGomegaWithT(t) + + type testcase struct { + name string + memberType v1alpha1.MemberType + currentReplicas int32 + recommendedReplicas int32 + currentScaleInCount int32 + currentScaleOutCount int32 + expectedScaleOutAnnValue string + expectedScaleInAnnValue string + } + + testFn := func(test *testcase) { + t.Log(test.name) + tac := newTidbClusterAutoScaler() + tac.Annotations[fmt.Sprintf("%s.%s", test.memberType, annScaleOutSuffix)] = fmt.Sprintf("%d", test.currentScaleOutCount) + tac.Annotations[fmt.Sprintf("%s.%s", test.memberType, annScaleInSuffix)] = fmt.Sprintf("%d", test.currentScaleInCount) + + err := updateConsecutiveCount(tac, test.memberType, test.currentReplicas, test.recommendedReplicas) + g.Expect(err).ShouldNot(HaveOccurred()) + updatedScaleOutCountAnnValue := tac.Annotations[fmt.Sprintf("%s.%s", test.memberType, annScaleOutSuffix)] + updatedScaleInCountAnnValue := tac.Annotations[fmt.Sprintf("%s.%s", test.memberType, annScaleInSuffix)] + g.Expect(updatedScaleOutCountAnnValue).Should(Equal(test.expectedScaleOutAnnValue)) + g.Expect(updatedScaleInCountAnnValue).Should(Equal(test.expectedScaleInAnnValue)) + } + + tests := []testcase{ + { + name: "tikv, no scale", + memberType: v1alpha1.TiKVMemberType, + currentReplicas: 3, + recommendedReplicas: 3, + currentScaleInCount: 1, + currentScaleOutCount: 0, + expectedScaleInAnnValue: "0", + expectedScaleOutAnnValue: "0", + }, + { + name: "tikv, would scale-out,first time", + memberType: v1alpha1.TiKVMemberType, + currentReplicas: 3, + recommendedReplicas: 4, + currentScaleInCount: 1, + currentScaleOutCount: 0, + expectedScaleInAnnValue: "0", + expectedScaleOutAnnValue: "1", + }, + { + name: "tikv, would scale-out,second time", + memberType: v1alpha1.TiKVMemberType, + currentReplicas: 3, + recommendedReplicas: 4, + currentScaleInCount: 0, + currentScaleOutCount: 1, + expectedScaleInAnnValue: "0", + expectedScaleOutAnnValue: "2", + }, + { + name: "tikv, would scale-in, first time", + memberType: v1alpha1.TiKVMemberType, + currentReplicas: 4, + recommendedReplicas: 3, + currentScaleInCount: 0, + currentScaleOutCount: 1, + expectedScaleInAnnValue: "1", + expectedScaleOutAnnValue: "0", + }, + { + name: "tikv, would scale-in, second time", + memberType: v1alpha1.TiKVMemberType, + currentReplicas: 4, + recommendedReplicas: 3, + currentScaleInCount: 1, + currentScaleOutCount: 0, + expectedScaleInAnnValue: "2", + expectedScaleOutAnnValue: "0", + }, + } + for _, test := range tests { + testFn(&test) + } + +} + +func TestCheckConsecutiveCount(t *testing.T) { + g := NewGomegaWithT(t) + + type testcase struct { + name string + memberType v1alpha1.MemberType + currentReplicas int32 + recommendedReplicas int32 + scaleInCount int + scaleOutCount int + ableScale bool + } + + testFn := func(test *testcase) { + t.Log(test.name) + tac := newTidbClusterAutoScaler() + tac.Annotations[fmt.Sprintf("%s.%s", test.memberType, annScaleOutSuffix)] = fmt.Sprintf("%d", test.scaleOutCount) + tac.Annotations[fmt.Sprintf("%s.%s", test.memberType, annScaleInSuffix)] = fmt.Sprintf("%d", test.scaleInCount) + + ableScale, err := checkConsecutiveCount(tac, test.memberType, test.currentReplicas, test.recommendedReplicas) + g.Expect(err).ShouldNot(HaveOccurred()) + g.Expect(ableScale).Should(Equal(test.ableScale)) + } + + tests := []testcase{ + { + name: "tikv success scale-out", + memberType: v1alpha1.TiKVMemberType, + currentReplicas: 3, + recommendedReplicas: 4, + scaleInCount: 0, + scaleOutCount: 2, + ableScale: true, + }, + { + name: "tikv can't scale-out", + memberType: v1alpha1.TiKVMemberType, + currentReplicas: 3, + recommendedReplicas: 4, + scaleInCount: 0, + scaleOutCount: 1, + ableScale: false, + }, + { + name: "tikv success scale-in", + memberType: v1alpha1.TiKVMemberType, + currentReplicas: 4, + recommendedReplicas: 3, + scaleInCount: 2, + scaleOutCount: 0, + ableScale: true, + }, + { + name: "tikv can't scale-in", + memberType: v1alpha1.TiKVMemberType, + currentReplicas: 4, + recommendedReplicas: 3, + scaleOutCount: 1, + scaleInCount: 0, + ableScale: false, + }, + } + + for _, test := range tests { + testFn(&test) + } +} + +func newTidbClusterAutoScaler() *v1alpha1.TidbClusterAutoScaler { + tac := &v1alpha1.TidbClusterAutoScaler{} + tac.Annotations = map[string]string{} + tac.Spec.TiKV = &v1alpha1.TikvAutoScalerSpec{} + tac.Spec.TiDB = &v1alpha1.TidbAutoScalerSpec{} + tac.Spec.TiKV.ScaleOutThreshold = pointer.Int32Ptr(2) + tac.Spec.TiKV.ScaleInThreshold = pointer.Int32Ptr(2) + tac.Spec.TiDB.ScaleOutThreshold = pointer.Int32Ptr(2) + tac.Spec.TiDB.ScaleInThreshold = pointer.Int32Ptr(2) + defaultTAC(tac) + return tac +} + +func newTidbCluster() *v1alpha1.TidbCluster { + tc := &v1alpha1.TidbCluster{} + tc.Annotations = map[string]string{} + tc.Name = "tc" + tc.Namespace = "ns" + return tc +} diff --git a/pkg/label/label.go b/pkg/label/label.go index e51bc3f6f8..c846952314 100644 --- a/pkg/label/label.go +++ b/pkg/label/label.go @@ -102,6 +102,19 @@ const ( // AnnTiKVLastAutoScalingTimestamp is annotation key of tidbclusterto which ordinal is created by tikv auto-scaling AnnTiKVLastAutoScalingTimestamp = "tikv.tidb.pingcap.com/last-autoscaling-timestamp" + // AnnTiDBConsecutiveScaleOutCount describes the least consecutive count to scale-out for tidb + AnnTiDBConsecutiveScaleOutCount = "tidb.tidb.pingcap.com/consecutive-scale-out-count" + // AnnTiDBConsecutiveScaleInCount describes the least consecutive count to scale-in for tidb + AnnTiDBConsecutiveScaleInCount = "tidb.tidb.pingcap.com/consecutive-scale-in-count" + // AnnTiKVConsecutiveScaleOutCount describes the least consecutive count to scale-out for tikv + AnnTiKVConsecutiveScaleOutCount = "tikv.tidb.pingcap.com/consecutive-scale-out-count" + // AnnTiKVConsecutiveScaleInCount describes the least consecutive count to scale-in for tikv + AnnTiKVConsecutiveScaleInCount = "tikv.tidb.pingcap.com/consecutive-scale-in-count" + // AnnAutoScalingTargetName describes the target TidbCluster Ref Name for the TidbCluserAutoScaler + AnnAutoScalingTargetName = "auto-scaling.tidb.pingcap.com/target-name" + // AnnAutoScalingTargetNamespace describes the target TidbCluster Ref Namespace for the TidbCluserAutoScaler + AnnAutoScalingTargetNamespace = "auto-scaling.tidb.pingcap.com/target-namespace" + // PDLabelVal is PD label value PDLabelVal string = "pd" // TiDBLabelVal is TiDB label value