Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[UCP] Noise reduction support when cluster auto scaling (#2307) #2568

Merged
merged 3 commits into from
May 29, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions docs/api-references/docs.md
Original file line number Diff line number Diff line change
Expand Up @@ -1623,6 +1623,13 @@ TidbMonitorStatus
</tr>
</tbody>
</table>
<h3 id="autoscalerphase">AutoScalerPhase</h3>
<p>
(<em>Appears on:</em>
<a href="#basicautoscalerstatus">BasicAutoScalerStatus</a>)
</p>
<p>
</p>
<h3 id="brconfig">BRConfig</h3>
<p>
(<em>Appears on:</em>
Expand Down Expand Up @@ -2467,6 +2474,19 @@ to fetch the recommended replicas for TiKV/TiDB</p>
<tbody>
<tr>
<td>
<code>phase</code></br>
<em>
<a href="#autoscalerphase">
AutoScalerPhase
</a>
</em>
</td>
<td>
<p>Phase describes cluster auto scaling phase</p>
</td>
</tr>
<tr>
<td>
<code>metrics</code></br>
<em>
<a href="#metricsstatus">
Expand Down Expand Up @@ -15364,6 +15384,20 @@ BasicAutoScalerSpec
</p>
</td>
</tr>
<tr>
<td>
<code>readyToScaleThresholdSeconds</code></br>
<em>
int32
</em>
</td>
<td>
<em>(Optional)</em>
<p>ReadyToScaleThresholdSeconds represents duration that the ReadyToScale phase
should last for before auto scaling.
If not set, the default ReadyToScaleThresholdSeconds will be set to 30.</p>
</td>
</tr>
</tbody>
</table>
<h3 id="tikvautoscalerstatus">TikvAutoScalerStatus</h3>
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ require (
github.com/grpc-ecosystem/go-grpc-middleware v1.0.1-0.20190118093823-f849b5445de4 // indirect
github.com/grpc-ecosystem/grpc-gateway v1.13.0 // indirect
github.com/imdario/mergo v0.3.7 // indirect
github.com/jonboulle/clockwork v0.1.0
github.com/juju/errors v0.0.0-20180806074554-22422dad46e1
github.com/juju/loggo v0.0.0-20180524022052-584905176618 // indirect
github.com/juju/testing v0.0.0-20180920084828-472a3e8b2073 // indirect
Expand Down
7 changes: 7 additions & 0 deletions manifests/crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6216,6 +6216,9 @@ spec:
minReplicas:
format: int32
type: integer
readyToScaleThresholdSeconds:
format: int32
type: integer
scaleInIntervalSeconds:
format: int32
type: integer
Expand Down Expand Up @@ -6259,6 +6262,8 @@ spec:
- thresholdValue
type: object
type: array
phase:
type: string
recommendedReplicas:
format: int32
type: integer
Expand Down Expand Up @@ -6288,6 +6293,8 @@ spec:
- thresholdValue
type: object
type: array
phase:
type: string
recommendedReplicas:
format: int32
type: integer
Expand Down
28 changes: 28 additions & 0 deletions pkg/apis/pingcap/v1alpha1/openapi_generated.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 16 additions & 0 deletions pkg/apis/pingcap/v1alpha1/tidbclusterautoscaler_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,14 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

type AutoScalerPhase string

const (
NormalAutoScalerPhase AutoScalerPhase = "Normal"
ReadyToScaleOutAutoScalerPhase AutoScalerPhase = "ReadyToScaleOut"
ReadyToScaleInAutoScalerPhase AutoScalerPhase = "ReadyToScaleIn"
)

// +genclient
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object

Expand Down Expand Up @@ -76,6 +84,12 @@ type TidbClusterAutoScalerSpec struct {
// TikvAutoScalerSpec describes the spec for tikv auto-scaling
type TikvAutoScalerSpec struct {
BasicAutoScalerSpec `json:",inline"`

// ReadyToScaleThresholdSeconds represents duration that the ReadyToScale phase
// should last for before auto scaling.
// If not set, the default ReadyToScaleThresholdSeconds will be set to 30.
// +optional
ReadyToScaleThresholdSeconds *int32 `json:"readyToScaleThresholdSeconds,omitempty"`
}

// +k8s:openapi-gen=true
Expand Down Expand Up @@ -180,6 +194,8 @@ type TikvAutoScalerStatus struct {
// +k8s:openapi-gen=true
// BasicAutoScalerStatus describe the basic auto-scaling status
type BasicAutoScalerStatus struct {
// Phase describes cluster auto scaling phase
Phase AutoScalerPhase `json:"phase,omitempty"`
// MetricsStatusList describes the metrics status in the last auto-scaling reconciliation
// +optional
MetricsStatusList []MetricsStatus `json:"metrics,omitempty"`
Expand Down
5 changes: 5 additions & 0 deletions pkg/apis/pingcap/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions pkg/autoscaler/autoscaler/autoscaler_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,8 @@ func (am *autoScalerManager) updateAutoScaling(oldTc *v1alpha1.TidbCluster,
return nil, nil
}

tac.Annotations[label.AnnLastSyncingTimestamp] = fmt.Sprintf("%d", time.Now().Unix())

if tac.Spec.TiKV != nil {
if oldTc.Status.TiKV.StatefulSet != nil {
tac.Status.TiKV.CurrentReplicas = oldTc.Status.TiKV.StatefulSet.CurrentReplicas
Expand Down
23 changes: 18 additions & 5 deletions pkg/autoscaler/autoscaler/tikv_autoscaler.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,12 +70,24 @@ func (am *autoScalerManager) syncTiKV(tc *v1alpha1.TidbCluster, tac *v1alpha1.Ti
// The currentReplicas of TiKV calculated in auto-scaling is the count of the StateUp TiKV instance, so we need to
// add the number of other state tikv instance replicas when we update the TidbCluster.Spec.TiKV.Replicas
func syncTiKVAfterCalculated(tc *v1alpha1.TidbCluster, tac *v1alpha1.TidbClusterAutoScaler, currentReplicas, recommendedReplicas int32, sts *appsv1.StatefulSet) error {

intervalSeconds := tac.Spec.TiKV.ScaleInIntervalSeconds
if recommendedReplicas > tc.Spec.TiKV.Replicas {
intervalSeconds = tac.Spec.TiKV.ScaleOutIntervalSeconds
if recommendedReplicas > currentReplicas {
if tac.Status.TiKV.Phase != v1alpha1.ReadyToScaleOutAutoScalerPhase {
tac.Status.TiKV.Phase = v1alpha1.ReadyToScaleOutAutoScalerPhase
// phase could change from Normal to ReadyToScaleOut, ReadyToScaleIn to ReadyToScaleOut,
// reset timestamp in both cases.
tac.Annotations[label.AnnTiKVReadyToScaleTimestamp] = fmt.Sprintf("%d", time.Now().Unix())
}
} else {
if tac.Status.TiKV.Phase != v1alpha1.ReadyToScaleInAutoScalerPhase {
tac.Status.TiKV.Phase = v1alpha1.ReadyToScaleInAutoScalerPhase
// phase could change from Normal to ReadyToScaleIn, ReadyToScaleOut to ReadyToScaleIn,
// reset timestamp in both cases.
tac.Annotations[label.AnnTiKVReadyToScaleTimestamp] = fmt.Sprintf("%d", time.Now().Unix())
}
}
ableToScale, err := checkStsAutoScalingInterval(tac, *intervalSeconds, v1alpha1.TiKVMemberType)

ableToScale, err := checkStsAutoScaling(tac, *tac.Spec.TiKV.ReadyToScaleThresholdSeconds, *intervalSeconds, v1alpha1.TiKVMemberType)
if err != nil {
return err
}
Expand All @@ -98,7 +110,6 @@ func filterTiKVInstances(tc *v1alpha1.TidbCluster) []string {

// we record the auto-scaling out slot for tikv, in order to add special hot labels when they are created
func updateTcTiKVIfScale(tc *v1alpha1.TidbCluster, tac *v1alpha1.TidbClusterAutoScaler, currentReplicas, recommendedReplicas int32, sts *appsv1.StatefulSet) error {
tac.Annotations[label.AnnTiKVLastAutoScalingTimestamp] = fmt.Sprintf("%d", time.Now().Unix())
if recommendedReplicas > currentReplicas {
newlyScaleOutOrdinalSets := helper.GetPodOrdinals(recommendedReplicas, sts).Difference(helper.GetPodOrdinals(currentReplicas, sts))
if newlyScaleOutOrdinalSets.Len() > 0 {
Expand All @@ -113,6 +124,8 @@ func updateTcTiKVIfScale(tc *v1alpha1.TidbCluster, tac *v1alpha1.TidbClusterAuto
tc.Annotations[label.AnnTiKVAutoScalingOutOrdinals] = v
}
}
tac.Status.TiKV.Phase = v1alpha1.NormalAutoScalerPhase
tac.Annotations[label.AnnTiKVLastAutoScalingTimestamp] = fmt.Sprintf("%d", time.Now().Unix())
tc.Spec.TiKV.Replicas = recommendedReplicas
tac.Status.TiKV.RecommendedReplicas = &recommendedReplicas
return nil
Expand Down
79 changes: 77 additions & 2 deletions pkg/autoscaler/autoscaler/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@ import (
"strconv"
"time"

"github.com/jonboulle/clockwork"
"github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1"
"github.com/pingcap/tidb-operator/pkg/controller"
"github.com/pingcap/tidb-operator/pkg/label"
operatorUtils "github.com/pingcap/tidb-operator/pkg/util"
appsv1 "k8s.io/api/apps/v1"
Expand Down Expand Up @@ -57,11 +59,81 @@ func checkStsAutoScalingPrerequisites(set *appsv1.StatefulSet) bool {
return true
}

// checkStsAutoScalingInterval would check whether there is enough interval duration between every two auto-scaling
func checkStsAutoScalingInterval(tac *v1alpha1.TidbClusterAutoScaler, intervalSeconds int32, memberType v1alpha1.MemberType) (bool, error) {
func checkStsAutoScaling(tac *v1alpha1.TidbClusterAutoScaler, thresholdSeconds, intervalSeconds int32, memberType v1alpha1.MemberType) (bool, error) {
realClock := clockwork.NewRealClock()
if tac.Annotations == nil {
tac.Annotations = map[string]string{}
}
// 3*controller.ResyncDuration is maximum time allowed before reset phase status
ableToScale, err := checkLastSyncingTimestamp(tac, 3*controller.ResyncDuration, realClock)
if err != nil {
return false, err
}
if !ableToScale {
return false, nil
}
ableToScale, err = checkStsReadyAutoScalingTimestamp(tac, thresholdSeconds, realClock)
if err != nil {
return false, err
}
if !ableToScale {
return false, nil
}
ableToScale, err = checkStsAutoScalingInterval(tac, intervalSeconds, memberType)
if err != nil {
return false, err
}
if !ableToScale {
return false, nil
}
return true, nil
}

// checkLastSyncingTimestamp reset TiKV phase if last auto scaling timestamp is longer than thresholdSec
func checkLastSyncingTimestamp(tac *v1alpha1.TidbClusterAutoScaler, thresholdSec time.Duration, clock clockwork.Clock) (bool, error) {
if tac.Annotations == nil {
tac.Annotations = map[string]string{}
}

lastAutoScalingTimestamp, existed := tac.Annotations[label.AnnLastSyncingTimestamp]
if !existed {
// NOTE: because record autoscaler sync timestamp happens after check auto scale,
// label will not exist during first sync, return allow auto scale in this case.
return true, nil
}
t, err := strconv.ParseInt(lastAutoScalingTimestamp, 10, 64)
if err != nil {
return false, err
}
// if there's no resync within thresholdSec, reset TiKV phase to Normal
if clock.Now().After(time.Unix(t, 0).Add(thresholdSec)) {
tac.Status.TiKV.Phase = v1alpha1.NormalAutoScalerPhase
return false, nil
}
return true, nil
}

// checkStsReadyAutoScalingTimestamp would check whether there is enough time window after ready to scale
func checkStsReadyAutoScalingTimestamp(tac *v1alpha1.TidbClusterAutoScaler, thresholdSeconds int32, clock clockwork.Clock) (bool, error) {
readyAutoScalingTimestamp, existed := tac.Annotations[label.AnnTiKVReadyToScaleTimestamp]

if !existed {
tac.Annotations[label.AnnTiKVReadyToScaleTimestamp] = fmt.Sprintf("%d", clock.Now().Unix())
return false, nil
}
t, err := strconv.ParseInt(readyAutoScalingTimestamp, 10, 32)
if err != nil {
return false, err
}
readyAutoScalingSec := int32(clock.Now().Sub(time.Unix(t, 0)).Seconds())
if thresholdSeconds > readyAutoScalingSec {
return false, nil
}
return true, nil
}

// checkStsAutoScalingInterval would check whether there is enough interval duration between every two auto-scaling
func checkStsAutoScalingInterval(tac *v1alpha1.TidbClusterAutoScaler, intervalSeconds int32, memberType v1alpha1.MemberType) (bool, error) {
lastAutoScalingTimestamp, existed := tac.Annotations[label.AnnTiDBLastAutoScalingTimestamp]
if memberType == v1alpha1.TiKVMemberType {
lastAutoScalingTimestamp, existed = tac.Annotations[label.AnnTiKVLastAutoScalingTimestamp]
Expand Down Expand Up @@ -147,6 +219,9 @@ func defaultTAC(tac *v1alpha1.TidbClusterAutoScaler) {
tac.Spec.TiKV.MetricsTimeDuration = pointer.StringPtr("3m")
}
}
if tac.Spec.TiKV.ReadyToScaleThresholdSeconds == nil {
tac.Spec.TiKV.ReadyToScaleThresholdSeconds = pointer.Int32Ptr(30)
}
}

if tac.Spec.TiDB != nil {
Expand Down
Loading