Skip to content

Commit

Permalink
Merge pull request kubernetes#2599 from 44past4/cooldown
Browse files Browse the repository at this point in the history
Add support for scale up/down delay
  • Loading branch information
k8s-ci-robot committed Dec 27, 2019
2 parents a1cd503 + c4e3abf commit dded45b
Show file tree
Hide file tree
Showing 3 changed files with 274 additions and 125 deletions.
6 changes: 5 additions & 1 deletion addon-resizer/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ var (
memoryPerNode = flag.String("extra-memory", "0Mi", "The amount of memory to add per node.")
baseStorage = flag.String("storage", noValue, "The base storage resource requirement.")
storagePerNode = flag.String("extra-storage", "0Gi", "The amount of storage to add per node.")
scaleDownDelay = flag.Duration("scale-down-delay", time.Duration(0), "The time to wait after the addon-resizer start or last scaling operation before the scale down can be performed.")
scaleUpDelay = flag.Duration("scale-up-delay", time.Duration(0), "The time to wait after the addon-resizer start or last scaling operation before the scale up can be performed.")
recommendationOffset = flag.Int("recommendation-offset", 10, "A number from range 0-100. When the dependent's resources are rewritten, they are set to the closer end of the range defined by this percentage threshold.")
acceptanceOffset = flag.Int("acceptance-offset", 20, "A number from range 0-100. The dependent's resources are rewritten when they deviate from expected by a percentage that is higher than this threshold. Can't be lower than recommendation-offset.")
// Flags to identify the container to nanny.
Expand Down Expand Up @@ -184,5 +186,7 @@ func main() {
RecommendationOffset: int64(*recommendationOffset),
Resources: resources,
},
pollPeriod)
pollPeriod,
*scaleDownDelay,
*scaleUpDelay)
}
138 changes: 93 additions & 45 deletions addon-resizer/nanny/nanny_lib.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,50 +21,66 @@ and update a deployment based on that status.
package nanny

import (
"encoding/json"
"time"

log "github.com/golang/glog"
api "k8s.io/api/core/v1"
)

type operation int

const (
unknown operation = iota
scaleDown operation = iota
scaleUp operation = iota
)

type updateResult int

const (
noChange updateResult = iota
postpone updateResult = iota
overwrite updateResult = iota
)

// checkResource determines whether a specific resource needs to be over-written.
func checkResource(estimatorResult *EstimatorResult, actual api.ResourceList, res api.ResourceName) *api.ResourceList {
func checkResource(estimatorResult *EstimatorResult, actual api.ResourceList, res api.ResourceName) (*api.ResourceList, operation) {
val, ok := actual[res]
expMinVal, expMinOk := estimatorResult.AcceptableRange.lower[res]
expMaxVal, expMaxOk := estimatorResult.AcceptableRange.upper[res]
if ok != expMinOk || ok != expMaxOk {
// Something changed, but we don't know whether lower or upper bound should be used.
// It doesn't matter in the long term, so we just pick lower bound arbitrarily here.
return &estimatorResult.RecommendedRange.lower
return &estimatorResult.RecommendedRange.lower, unknown
}
if !ok && !expMinOk && !expMaxOk {
return nil
return nil, unknown
}
if val.Cmp(expMinVal) == -1 {
return &estimatorResult.RecommendedRange.lower
return &estimatorResult.RecommendedRange.lower, scaleUp
}
if val.Cmp(expMaxVal) == 1 {
return &estimatorResult.RecommendedRange.upper
return &estimatorResult.RecommendedRange.upper, scaleDown
}
return nil
return nil, unknown
}

// shouldOverwriteResources determines if we should over-write the container's
// resource limits. We'll over-write the resource limits if the limited
// resources are different, or if any limit is outside of the accepted range.
// Returns null when no resources should be overridden.
// Otherwise, returns ResourceList that should be used.
func shouldOverwriteResources(estimatorResult *EstimatorResult, limits, reqs api.ResourceList) *api.ResourceRequirements {
// shouldOverwriteResources determines if we should over-write the container's resource limits and determines type of the operation.
// We'll over-write the resource limits if the limited resources are different, or if any limit is outside of the accepted range.
// Returns null ResourceRequirements when no resources should be overridden.
// Returned operation type (scale up/down or unknown) is calculated based on values taken from the first resource which requires overwrite.
func shouldOverwriteResources(estimatorResult *EstimatorResult, limits, reqs api.ResourceList) (*api.ResourceRequirements, operation) {
for _, list := range []api.ResourceList{limits, reqs} {
for _, resourceType := range []api.ResourceName{api.ResourceCPU, api.ResourceMemory, api.ResourceStorage} {
newReqs := checkResource(estimatorResult, list, resourceType)
newReqs, op := checkResource(estimatorResult, list, resourceType)
if newReqs != nil {
log.V(4).Infof("Resource %s is out of bounds.", resourceType)
return &api.ResourceRequirements{Limits: *newReqs, Requests: *newReqs}
return &api.ResourceRequirements{Limits: *newReqs, Requests: *newReqs}, op
}
}
}
return nil
return nil, unknown
}

// KubernetesClient is an object that performs the nanny's requisite interactions with Kubernetes.
Expand All @@ -84,46 +100,78 @@ type ResourceEstimator interface {
// PollAPIServer periodically counts the number of nodes, estimates the expected
// ResourceRequirements, compares them to the actual ResourceRequirements, and
// updates the deployment with the expected ResourceRequirements if necessary.
func PollAPIServer(k8s KubernetesClient, est ResourceEstimator, pollPeriod time.Duration) {
func PollAPIServer(k8s KubernetesClient, est ResourceEstimator, pollPeriod, scaleDownDelay, scaleUpDelay time.Duration) {
lastChange := time.Now()
lastResult := noChange

for i := 0; true; i++ {
if i != 0 {
// Sleep for the poll period.
time.Sleep(pollPeriod)
}

// Query the apiserver for the number of nodes.
num, err := k8s.CountNodes()
if num == 0 {
log.V(2).Info("No nodes found, probably listers have not synced yet. Skipping current check.")
continue
if lastResult = updateResources(k8s, est, time.Now(), lastChange, scaleDownDelay, scaleUpDelay, lastResult); lastResult == overwrite {
lastChange = time.Now()
}
if err != nil {
log.Error(err)
continue
}
log.V(4).Infof("The number of nodes is %d", num)
}
}

// Query the apiserver for this pod's information.
resources, err := k8s.ContainerResources()
if err != nil {
log.Errorf("Error while querying apiserver for resources: %v", err)
continue
}
// updateResources counts the number of nodes, estimates the expected
// ResourceRequirements, compares them to the actual ResourceRequirements, and
// updates the deployment with the expected ResourceRequirements if necessary.
// It returns overwrite if deployment has been updated, postpone if the change
// could not be applied due to scale up/down delay and noChange if the estimated
// expected ResourceRequirements are in line with the actual ResourceRequirements.
func updateResources(k8s KubernetesClient, est ResourceEstimator, now, lastChange time.Time, scaleDownDelay, scaleUpDelay time.Duration, prevResult updateResult) updateResult {

// Query the apiserver for the number of nodes.
num, err := k8s.CountNodes()
if num == 0 {
log.V(2).Info("No nodes found, probably listers have not synced yet. Skipping current check.")
return noChange
}
if err != nil {
log.Error(err)
return noChange
}
log.V(4).Infof("The number of nodes is %d", num)

// Get the expected resource limits.
estimation := est.scaleWithNodes(num)
// Query the apiserver for this pod's information.
resources, err := k8s.ContainerResources()
if err != nil {
log.Errorf("Error while querying apiserver for resources: %v", err)
return noChange
}

// If there's a difference, go ahead and set the new values.
overwrite := shouldOverwriteResources(estimation, resources.Limits, resources.Requests)
if overwrite == nil {
log.V(4).Infof("Resources are within the expected limits. Actual: %+v, accepted range: %+v", *resources, estimation.AcceptableRange)
continue
}
// Get the expected resource limits.
estimation := est.scaleWithNodes(num)

log.Infof("Resources are not within the expected limits, updating the deployment. Actual: %+v New: %+v", *resources, *overwrite)
if err := k8s.UpdateDeployment(overwrite); err != nil {
log.Error(err)
continue
}
// If there's a difference, go ahead and set the new values.
overwriteResReq, op := shouldOverwriteResources(estimation, resources.Limits, resources.Requests)
if overwriteResReq == nil {
log.V(4).Infof("Resources are within the expected limits. Actual: %+v, accepted range: %+v", jsonOrValue(*resources), jsonOrValue(estimation.AcceptableRange))
return noChange
}

if (op == scaleDown && now.Before(lastChange.Add(scaleDownDelay))) ||
(op == scaleUp && now.Before(lastChange.Add(scaleUpDelay))) {
log.Infof("Resources are not within the expected limits, Actual: %+v, accepted range: %+v. Skipping resource update because of scale up/down delay", jsonOrValue(*resources), jsonOrValue(estimation.AcceptableRange))
return postpone
}

log.Infof("Resources are not within the expected limits, updating the deployment. Actual: %+v New: %+v", *resources, jsonOrValue(*overwriteResReq))
if err := k8s.UpdateDeployment(overwriteResReq); err != nil {
log.Error(err)
return noChange
}
lastChange = time.Now()
return overwrite
}

func jsonOrValue(val interface{}) interface{} {
bytes, err := json.Marshal(val)
if err != nil {
return val
}
return string(bytes)
}
Loading

0 comments on commit dded45b

Please sign in to comment.