Skip to content

Commit

Permalink
auto scaling noise reduction support
Browse files Browse the repository at this point in the history
* add ReadyToScaleThresholdSeconds in AutoScalerSpec
* add AnnTiKVReadyToScaleTimestamp timestamp labels to record AutoScalerPhase
* add Normal, ReadyToScaleOut and ReadyToScaleIn three AutoScalerPhase
timestamp
* add checkStsReadyAutoScalingTimestamp to check AutoScalerPhase
timestamp, only for TiKV
* add checkStsLastSyncTimestamp to check maximum thresholdSec allowed
before reset phase to Normal, only for TiKV
* add checkStsAutoScaling combine checkStsLastSyncTimestamp,
checkStsReadyAutoScalingTimestamp and checkStsAutoScalingInterval
* add unit tests
* add integration e2e tests
* update doc
  • Loading branch information
vincent178 committed May 22, 2020
1 parent 08dc79c commit 305220a
Show file tree
Hide file tree
Showing 12 changed files with 370 additions and 21 deletions.
34 changes: 34 additions & 0 deletions docs/api-references/docs.md
Original file line number Diff line number Diff line change
Expand Up @@ -1599,6 +1599,13 @@ TidbMonitorStatus
</tr>
</tbody>
</table>
<h3 id="autoscalerphase">AutoScalerPhase</h3>
<p>
(<em>Appears on:</em>
<a href="#basicautoscalerstatus">BasicAutoScalerStatus</a>)
</p>
<p>
</p>
<h3 id="brconfig">BRConfig</h3>
<p>
(<em>Appears on:</em>
Expand Down Expand Up @@ -2431,6 +2438,19 @@ to fetch the recommended replicas for TiKV/TiDB</p>
<tbody>
<tr>
<td>
<code>phase</code></br>
<em>
<a href="#autoscalerphase">
AutoScalerPhase
</a>
</em>
</td>
<td>
<p>Phase describes cluster auto scaling phase</p>
</td>
</tr>
<tr>
<td>
<code>metrics</code></br>
<em>
<a href="#metricsstatus">
Expand Down Expand Up @@ -15135,6 +15155,20 @@ BasicAutoScalerSpec
</p>
</td>
</tr>
<tr>
<td>
<code>readyToScaleThresholdSeconds</code></br>
<em>
int32
</em>
</td>
<td>
<em>(Optional)</em>
<p>ReadyToScaleThresholdSeconds represents duration that the ReadyToScale phase
should last for before auto scaling.
If not set, the default ReadyToScaleThresholdSeconds will be set to 300.</p>
</td>
</tr>
</tbody>
</table>
<h3 id="tikvautoscalerstatus">TikvAutoScalerStatus</h3>
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ require (
github.com/grpc-ecosystem/go-grpc-middleware v1.0.1-0.20190118093823-f849b5445de4 // indirect
github.com/grpc-ecosystem/grpc-gateway v1.13.0 // indirect
github.com/imdario/mergo v0.3.7 // indirect
github.com/jonboulle/clockwork v0.1.0
github.com/juju/errors v0.0.0-20180806074554-22422dad46e1
github.com/juju/loggo v0.0.0-20180524022052-584905176618 // indirect
github.com/juju/testing v0.0.0-20180920084828-472a3e8b2073 // indirect
Expand Down
7 changes: 7 additions & 0 deletions manifests/crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6246,6 +6246,9 @@ spec:
minReplicas:
format: int32
type: integer
readyToScaleThresholdSeconds:
format: int32
type: integer
scaleInIntervalSeconds:
format: int32
type: integer
Expand Down Expand Up @@ -6289,6 +6292,8 @@ spec:
- thresholdValue
type: object
type: array
phase:
type: string
recommendedReplicas:
format: int32
type: integer
Expand Down Expand Up @@ -6318,6 +6323,8 @@ spec:
- thresholdValue
type: object
type: array
phase:
type: string
recommendedReplicas:
format: int32
type: integer
Expand Down
28 changes: 28 additions & 0 deletions pkg/apis/pingcap/v1alpha1/openapi_generated.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 16 additions & 0 deletions pkg/apis/pingcap/v1alpha1/tidbclusterautoscaler_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,14 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

type AutoScalerPhase string

const (
NormalAutoScalerPhase AutoScalerPhase = "Normal"
ReadyToScaleOutAutoScalerPhase AutoScalerPhase = "ReadyToScaleOut"
ReadyToScaleInAutoScalerPhase AutoScalerPhase = "ReadyToScaleIn"
)

// +genclient
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object

Expand Down Expand Up @@ -76,6 +84,12 @@ type TidbClusterAutoScalerSpec struct {
// TikvAutoScalerSpec describes the spec for tikv auto-scaling
type TikvAutoScalerSpec struct {
BasicAutoScalerSpec `json:",inline"`

// ReadyToScaleThresholdSeconds represents duration that the ReadyToScale phase
// should last for before auto scaling.
// If not set, the default ReadyToScaleThresholdSeconds will be set to 300.
// +optional
ReadyToScaleThresholdSeconds *int32 `json:"readyToScaleThresholdSeconds,omitempty"`
}

// +k8s:openapi-gen=true
Expand Down Expand Up @@ -180,6 +194,8 @@ type TikvAutoScalerStatus struct {
// +k8s:openapi-gen=true
// BasicAutoScalerStatus describe the basic auto-scaling status
type BasicAutoScalerStatus struct {
// Phase describes cluster auto scaling phase
Phase AutoScalerPhase `json:"phase,omitempty"`
// MetricsStatusList describes the metrics status in the last auto-scaling reconciliation
// +optional
MetricsStatusList []MetricsStatus `json:"metrics,omitempty"`
Expand Down
5 changes: 5 additions & 0 deletions pkg/apis/pingcap/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions pkg/autoscaler/autoscaler/autoscaler_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,8 @@ func (am *autoScalerManager) updateAutoScaling(oldTc *v1alpha1.TidbCluster,
return nil, nil
}

tac.Annotations[label.AnnLastSyncingTimestamp] = fmt.Sprintf("%d", time.Now().Unix())

if tac.Spec.TiKV != nil {
if oldTc.Status.TiKV.StatefulSet != nil {
tac.Status.TiKV.CurrentReplicas = oldTc.Status.TiKV.StatefulSet.CurrentReplicas
Expand Down
23 changes: 18 additions & 5 deletions pkg/autoscaler/autoscaler/tikv_autoscaler.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,12 +70,24 @@ func (am *autoScalerManager) syncTiKV(tc *v1alpha1.TidbCluster, tac *v1alpha1.Ti
// The currentReplicas of TiKV calculated in auto-scaling is the count of the StateUp TiKV instance, so we need to
// add the number of other state tikv instance replicas when we update the TidbCluster.Spec.TiKV.Replicas
func syncTiKVAfterCalculated(tc *v1alpha1.TidbCluster, tac *v1alpha1.TidbClusterAutoScaler, currentReplicas, recommendedReplicas int32, sts *appsv1.StatefulSet) error {

intervalSeconds := tac.Spec.TiKV.ScaleInIntervalSeconds
if recommendedReplicas > tc.Spec.TiKV.Replicas {
intervalSeconds = tac.Spec.TiKV.ScaleOutIntervalSeconds
if recommendedReplicas > currentReplicas {
if tac.Status.TiKV.Phase != v1alpha1.ReadyToScaleOutAutoScalerPhase {
tac.Status.TiKV.Phase = v1alpha1.ReadyToScaleOutAutoScalerPhase
// phase could change from Normal to ReadyToScaleOut, ReadyToScaleIn to ReadyToScaleOut,
// reset timestamp in both cases.
tac.Annotations[label.AnnTiKVReadyToScaleTimestamp] = fmt.Sprintf("%d", time.Now().Unix())
}
} else {
if tac.Status.TiKV.Phase != v1alpha1.ReadyToScaleInAutoScalerPhase {
tac.Status.TiKV.Phase = v1alpha1.ReadyToScaleInAutoScalerPhase
// phase could change from Normal to ReadyToScaleIn, ReadyToScaleOut to ReadyToScaleIn,
// reset timestamp in both cases.
tac.Annotations[label.AnnTiKVReadyToScaleTimestamp] = fmt.Sprintf("%d", time.Now().Unix())
}
}
ableToScale, err := checkStsAutoScalingInterval(tac, *intervalSeconds, v1alpha1.TiKVMemberType)

ableToScale, err := checkStsAutoScaling(tac, *tac.Spec.TiKV.ReadyToScaleThresholdSeconds, *intervalSeconds, v1alpha1.TiKVMemberType)
if err != nil {
return err
}
Expand All @@ -98,7 +110,6 @@ func filterTiKVInstances(tc *v1alpha1.TidbCluster) []string {

// we record the auto-scaling out slot for tikv, in order to add special hot labels when they are created
func updateTcTiKVIfScale(tc *v1alpha1.TidbCluster, tac *v1alpha1.TidbClusterAutoScaler, currentReplicas, recommendedReplicas int32, sts *appsv1.StatefulSet) error {
tac.Annotations[label.AnnTiKVLastAutoScalingTimestamp] = fmt.Sprintf("%d", time.Now().Unix())
if recommendedReplicas > currentReplicas {
newlyScaleOutOrdinalSets := helper.GetPodOrdinals(recommendedReplicas, sts).Difference(helper.GetPodOrdinals(currentReplicas, sts))
if newlyScaleOutOrdinalSets.Len() > 0 {
Expand All @@ -113,6 +124,8 @@ func updateTcTiKVIfScale(tc *v1alpha1.TidbCluster, tac *v1alpha1.TidbClusterAuto
tc.Annotations[label.AnnTiKVAutoScalingOutOrdinals] = v
}
}
tac.Status.TiKV.Phase = v1alpha1.NormalAutoScalerPhase
tac.Annotations[label.AnnTiKVLastAutoScalingTimestamp] = fmt.Sprintf("%d", time.Now().Unix())
tc.Spec.TiKV.Replicas = recommendedReplicas
tac.Status.TiKV.RecommendedReplicas = &recommendedReplicas
return nil
Expand Down
79 changes: 77 additions & 2 deletions pkg/autoscaler/autoscaler/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@ import (
"strconv"
"time"

"github.com/jonboulle/clockwork"
"github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1"
"github.com/pingcap/tidb-operator/pkg/controller"
"github.com/pingcap/tidb-operator/pkg/label"
operatorUtils "github.com/pingcap/tidb-operator/pkg/util"
appsv1 "k8s.io/api/apps/v1"
Expand Down Expand Up @@ -57,11 +59,81 @@ func checkStsAutoScalingPrerequisites(set *appsv1.StatefulSet) bool {
return true
}

// checkStsAutoScalingInterval would check whether there is enough interval duration between every two auto-scaling
func checkStsAutoScalingInterval(tac *v1alpha1.TidbClusterAutoScaler, intervalSeconds int32, memberType v1alpha1.MemberType) (bool, error) {
func checkStsAutoScaling(tac *v1alpha1.TidbClusterAutoScaler, thresholdSeconds, intervalSeconds int32, memberType v1alpha1.MemberType) (bool, error) {
realClock := clockwork.NewRealClock()
if tac.Annotations == nil {
tac.Annotations = map[string]string{}
}
// 3*controller.ResyncDuration is maximum time allowed before reset phase status
ableToScale, err := checkLastSyncingTimestamp(tac, 3*controller.ResyncDuration, realClock)
if err != nil {
return false, err
}
if !ableToScale {
return false, nil
}
ableToScale, err = checkStsReadyAutoScalingTimestamp(tac, thresholdSeconds, realClock)
if err != nil {
return false, err
}
if !ableToScale {
return false, nil
}
ableToScale, err = checkStsAutoScalingInterval(tac, intervalSeconds, memberType)
if err != nil {
return false, err
}
if !ableToScale {
return false, nil
}
return true, nil
}

// checkLastSyncingTimestamp reset TiKV phase if last auto scaling timestamp is longer than thresholdSec
func checkLastSyncingTimestamp(tac *v1alpha1.TidbClusterAutoScaler, thresholdSec time.Duration, clock clockwork.Clock) (bool, error) {
if tac.Annotations == nil {
tac.Annotations = map[string]string{}
}

lastAutoScalingTimestamp, existed := tac.Annotations[label.AnnTiKVLastAutoScalingTimestamp]
if !existed {
// NOTE: because record autoscaler sync timestamp happens after check auto scale,
// label will not exist during first sync, return allow auto scale in this case.
return true, nil
}
t, err := strconv.ParseInt(lastAutoScalingTimestamp, 10, 64)
if err != nil {
return false, err
}
// if there's no resync within thresholdSec, reset TiKV phase to Normal
if clock.Now().After(time.Unix(t, 0).Add(thresholdSec)) {
tac.Status.TiKV.Phase = v1alpha1.NormalAutoScalerPhase
return false, nil
}
return true, nil
}

// checkStsReadyAutoScalingTimestamp would check whether there is enough time window after ready to scale
func checkStsReadyAutoScalingTimestamp(tac *v1alpha1.TidbClusterAutoScaler, thresholdSeconds int32, clock clockwork.Clock) (bool, error) {
readyAutoScalingTimestamp, existed := tac.Annotations[label.AnnTiKVReadyToScaleTimestamp]

if !existed {
tac.Annotations[label.AnnTiKVReadyToScaleTimestamp] = fmt.Sprintf("%d", clock.Now().Unix())
return false, nil
}
t, err := strconv.ParseInt(readyAutoScalingTimestamp, 10, 32)
if err != nil {
return false, err
}
readyAutoScalingSec := int32(clock.Now().Sub(time.Unix(t, 0)).Seconds())
if thresholdSeconds > readyAutoScalingSec {
return false, nil
}
return true, nil
}

// checkStsAutoScalingInterval would check whether there is enough interval duration between every two auto-scaling
func checkStsAutoScalingInterval(tac *v1alpha1.TidbClusterAutoScaler, intervalSeconds int32, memberType v1alpha1.MemberType) (bool, error) {
lastAutoScalingTimestamp, existed := tac.Annotations[label.AnnTiDBLastAutoScalingTimestamp]
if memberType == v1alpha1.TiKVMemberType {
lastAutoScalingTimestamp, existed = tac.Annotations[label.AnnTiKVLastAutoScalingTimestamp]
Expand Down Expand Up @@ -147,6 +219,9 @@ func defaultTAC(tac *v1alpha1.TidbClusterAutoScaler) {
tac.Spec.TiKV.MetricsTimeDuration = pointer.StringPtr("3m")
}
}
if tac.Spec.TiKV.ReadyToScaleThresholdSeconds == nil {
tac.Spec.TiKV.ReadyToScaleThresholdSeconds = pointer.Int32Ptr(300)
}
}

if tac.Spec.TiDB != nil {
Expand Down
Loading

0 comments on commit 305220a

Please sign in to comment.