diff --git a/controllers/autoscaling.go b/controllers/autoscaling.go index 0035ca9c87..4e9edd5535 100644 --- a/controllers/autoscaling.go +++ b/controllers/autoscaling.go @@ -34,7 +34,7 @@ func getValueAvailableAt(now time.Time, from, to *time.Time, reservedValue int) return &reservedValue } -func (r *HorizontalRunnerAutoscalerReconciler) getDesiredReplicasFromCache(hra v1alpha1.HorizontalRunnerAutoscaler) *int { +func (r *HorizontalRunnerAutoscalerReconciler) fetchSuggestedReplicasFromCache(hra v1alpha1.HorizontalRunnerAutoscaler) *int { var entry *v1alpha1.CacheEntry for i := range hra.Status.CacheEntries { @@ -63,7 +63,7 @@ func (r *HorizontalRunnerAutoscalerReconciler) getDesiredReplicasFromCache(hra v return nil } -func (r *HorizontalRunnerAutoscalerReconciler) determineDesiredReplicas(rd v1alpha1.RunnerDeployment, hra v1alpha1.HorizontalRunnerAutoscaler) (*int, error) { +func (r *HorizontalRunnerAutoscalerReconciler) suggestDesiredReplicas(rd v1alpha1.RunnerDeployment, hra v1alpha1.HorizontalRunnerAutoscaler) (*int, error) { if hra.Spec.MinReplicas == nil { return nil, fmt.Errorf("horizontalrunnerautoscaler %s/%s is missing minReplicas", hra.Namespace, hra.Name) } else if hra.Spec.MaxReplicas == nil { @@ -73,20 +73,20 @@ func (r *HorizontalRunnerAutoscalerReconciler) determineDesiredReplicas(rd v1alp metrics := hra.Spec.Metrics if len(metrics) == 0 { if len(hra.Spec.ScaleUpTriggers) == 0 { - return r.calculateReplicasByQueuedAndInProgressWorkflowRuns(rd, hra) + return r.suggestReplicasByQueuedAndInProgressWorkflowRuns(rd, hra) } - return hra.Spec.MinReplicas, nil + return nil, nil } else if metrics[0].Type == v1alpha1.AutoscalingMetricTypeTotalNumberOfQueuedAndInProgressWorkflowRuns { - return r.calculateReplicasByQueuedAndInProgressWorkflowRuns(rd, hra) + return r.suggestReplicasByQueuedAndInProgressWorkflowRuns(rd, hra) } else if metrics[0].Type == v1alpha1.AutoscalingMetricTypePercentageRunnersBusy { - return r.calculateReplicasByPercentageRunnersBusy(rd, hra) + return r.suggestReplicasByPercentageRunnersBusy(rd, hra) } else { return nil, fmt.Errorf("validting autoscaling metrics: unsupported metric type %q", metrics[0].Type) } } -func (r *HorizontalRunnerAutoscalerReconciler) calculateReplicasByQueuedAndInProgressWorkflowRuns(rd v1alpha1.RunnerDeployment, hra v1alpha1.HorizontalRunnerAutoscaler) (*int, error) { +func (r *HorizontalRunnerAutoscalerReconciler) suggestReplicasByQueuedAndInProgressWorkflowRuns(rd v1alpha1.RunnerDeployment, hra v1alpha1.HorizontalRunnerAutoscaler) (*int, error) { var repos [][]string metrics := hra.Spec.Metrics @@ -101,7 +101,7 @@ func (r *HorizontalRunnerAutoscalerReconciler) calculateReplicasByQueuedAndInPro // we assume that the desired replicas should always be `minReplicas + capacityReservedThroughWebhook`. // See https://github.com/summerwind/actions-runner-controller/issues/377#issuecomment-793372693 if len(metrics) == 0 { - return hra.Spec.MinReplicas, nil + return nil, nil } if len(metrics[0].RepositoryNames) == 0 { @@ -178,28 +178,10 @@ func (r *HorizontalRunnerAutoscalerReconciler) calculateReplicasByQueuedAndInPro } } - minReplicas := *hra.Spec.MinReplicas - maxReplicas := *hra.Spec.MaxReplicas necessaryReplicas := queued + inProgress - var desiredReplicas int - - if necessaryReplicas < minReplicas { - desiredReplicas = minReplicas - } else if necessaryReplicas > maxReplicas { - desiredReplicas = maxReplicas - } else { - desiredReplicas = necessaryReplicas - } - - rd.Status.Replicas = &desiredReplicas - replicas := desiredReplicas - r.Log.V(1).Info( - "Calculated desired replicas", - "computed_replicas_desired", desiredReplicas, - "spec_replicas_min", minReplicas, - "spec_replicas_max", maxReplicas, + fmt.Sprintf("Suggested desired replicas of %d by TotalNumberOfQueuedAndInProgressWorkflowRuns", necessaryReplicas), "workflow_runs_completed", completed, "workflow_runs_in_progress", inProgress, "workflow_runs_queued", queued, @@ -209,13 +191,11 @@ func (r *HorizontalRunnerAutoscalerReconciler) calculateReplicasByQueuedAndInPro "horizontal_runner_autoscaler", hra.Name, ) - return &replicas, nil + return &necessaryReplicas, nil } -func (r *HorizontalRunnerAutoscalerReconciler) calculateReplicasByPercentageRunnersBusy(rd v1alpha1.RunnerDeployment, hra v1alpha1.HorizontalRunnerAutoscaler) (*int, error) { +func (r *HorizontalRunnerAutoscalerReconciler) suggestReplicasByPercentageRunnersBusy(rd v1alpha1.RunnerDeployment, hra v1alpha1.HorizontalRunnerAutoscaler) (*int, error) { ctx := context.Background() - minReplicas := *hra.Spec.MinReplicas - maxReplicas := *hra.Spec.MaxReplicas metrics := hra.Spec.Metrics[0] scaleUpThreshold := defaultScaleUpThreshold scaleDownThreshold := defaultScaleDownThreshold @@ -363,21 +343,13 @@ func (r *HorizontalRunnerAutoscalerReconciler) calculateReplicasByPercentageRunn desiredReplicas = *rd.Spec.Replicas } - if desiredReplicas < minReplicas { - desiredReplicas = minReplicas - } else if desiredReplicas > maxReplicas { - desiredReplicas = maxReplicas - } - // NOTES for operators: // // - num_runners can be as twice as large as replicas_desired_before while // the runnerdeployment controller is replacing RunnerReplicaSet for runner update. r.Log.V(1).Info( - "Calculated desired replicas", - "replicas_min", minReplicas, - "replicas_max", maxReplicas, + fmt.Sprintf("Suggested desired replicas of %d by PercentageRunnersBusy", desiredReplicas), "replicas_desired_before", desiredReplicasBefore, "replicas_desired", desiredReplicas, "num_runners", numRunners, @@ -391,8 +363,5 @@ func (r *HorizontalRunnerAutoscalerReconciler) calculateReplicasByPercentageRunn "repository", repository, ) - rd.Status.Replicas = &desiredReplicas - replicas := desiredReplicas - - return &replicas, nil + return &desiredReplicas, nil } diff --git a/controllers/autoscaling_test.go b/controllers/autoscaling_test.go index 50d458fa97..01921d9483 100644 --- a/controllers/autoscaling_test.go +++ b/controllers/autoscaling_test.go @@ -224,7 +224,7 @@ func TestDetermineDesiredReplicas_RepositoryRunner(t *testing.T) { }, } - got, err := h.computeReplicas(rd, hra) + got, _, _, err := h.computeReplicasWithCache(log, metav1Now.Time, rd, hra) if err != nil { if tc.err == "" { t.Fatalf("unexpected error: expected none, got %v", err) @@ -234,12 +234,8 @@ func TestDetermineDesiredReplicas_RepositoryRunner(t *testing.T) { return } - if got == nil { - t.Fatalf("unexpected value of rs.Spec.Replicas: nil") - } - - if *got != tc.want { - t.Errorf("%d: incorrect desired replicas: want %d, got %d", i, tc.want, *got) + if got != tc.want { + t.Errorf("%d: incorrect desired replicas: want %d, got %d", i, tc.want, got) } }) } @@ -424,6 +420,8 @@ func TestDetermineDesiredReplicas_OrganizationalRunner(t *testing.T) { _ = v1alpha1.AddToScheme(scheme) t.Run(fmt.Sprintf("case %d", i), func(t *testing.T) { + t.Helper() + server := fake.NewServer( fake.WithListRepositoryWorkflowRunsResponse(200, tc.workflowRuns, tc.workflowRuns_queued, tc.workflowRuns_in_progress), fake.WithListWorkflowJobsResponse(200, tc.workflowJobs), @@ -485,7 +483,7 @@ func TestDetermineDesiredReplicas_OrganizationalRunner(t *testing.T) { }, } - got, err := h.computeReplicas(rd, hra) + got, _, _, err := h.computeReplicasWithCache(log, metav1Now.Time, rd, hra) if err != nil { if tc.err == "" { t.Fatalf("unexpected error: expected none, got %v", err) @@ -495,12 +493,8 @@ func TestDetermineDesiredReplicas_OrganizationalRunner(t *testing.T) { return } - if got == nil { - t.Fatalf("unexpected value of rs.Spec.Replicas: nil, wanted %v", tc.want) - } - - if *got != tc.want { - t.Errorf("%d: incorrect desired replicas: want %d, got %d", i, tc.want, *got) + if got != tc.want { + t.Errorf("%d: incorrect desired replicas: want %d, got %d", i, tc.want, got) } }) } diff --git a/controllers/horizontalrunnerautoscaler_controller.go b/controllers/horizontalrunnerautoscaler_controller.go index ce2959c0a3..a64b83b13f 100644 --- a/controllers/horizontalrunnerautoscaler_controller.go +++ b/controllers/horizontalrunnerautoscaler_controller.go @@ -19,6 +19,7 @@ package controllers import ( "context" "fmt" + corev1 "k8s.io/api/core/v1" "time" "github.com/summerwind/actions-runner-controller/github" @@ -30,7 +31,6 @@ import ( ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" - corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "github.com/summerwind/actions-runner-controller/api/v1alpha1" @@ -52,6 +52,8 @@ type HorizontalRunnerAutoscalerReconciler struct { Name string } +const defaultReplicas = 1 + // +kubebuilder:rbac:groups=actions.summerwind.dev,resources=runnerdeployments,verbs=get;list;watch;update;patch // +kubebuilder:rbac:groups=actions.summerwind.dev,resources=horizontalrunnerautoscalers,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=actions.summerwind.dev,resources=horizontalrunnerautoscalers/finalizers,verbs=get;list;watch;create;update;patch;delete @@ -83,41 +85,18 @@ func (r *HorizontalRunnerAutoscalerReconciler) Reconcile(req ctrl.Request) (ctrl return ctrl.Result{}, nil } - var replicas *int - - replicasFromCache := r.getDesiredReplicasFromCache(hra) - - if replicasFromCache != nil { - replicas = replicasFromCache - } else { - var err error + now := time.Now() - replicas, err = r.computeReplicas(rd, hra) - if err != nil { - r.Recorder.Event(&hra, corev1.EventTypeNormal, "RunnerAutoscalingFailure", err.Error()) + newDesiredReplicas, computedReplicas, computedReplicasFromCache, err := r.computeReplicasWithCache(log, now, rd, hra) + if err != nil { + r.Recorder.Event(&hra, corev1.EventTypeNormal, "RunnerAutoscalingFailure", err.Error()) - log.Error(err, "Could not compute replicas") + log.Error(err, "Could not compute replicas") - return ctrl.Result{}, err - } + return ctrl.Result{}, err } - const defaultReplicas = 1 - currentDesiredReplicas := getIntOrDefault(rd.Spec.Replicas, defaultReplicas) - newDesiredReplicas := getIntOrDefault(replicas, defaultReplicas) - - now := time.Now() - - for _, reservation := range hra.Spec.CapacityReservations { - if reservation.ExpirationTime.Time.After(now) { - newDesiredReplicas += reservation.Replicas - } - } - - if hra.Spec.MaxReplicas != nil && *hra.Spec.MaxReplicas < newDesiredReplicas { - newDesiredReplicas = *hra.Spec.MaxReplicas - } // Please add more conditions that we can in-place update the newest runnerreplicaset without disruption if currentDesiredReplicas != newDesiredReplicas { @@ -143,7 +122,7 @@ func (r *HorizontalRunnerAutoscalerReconciler) Reconcile(req ctrl.Request) (ctrl updated.Status.DesiredReplicas = &newDesiredReplicas } - if replicasFromCache == nil { + if computedReplicasFromCache == nil { if updated == nil { updated = hra.DeepCopy() } @@ -160,7 +139,7 @@ func (r *HorizontalRunnerAutoscalerReconciler) Reconcile(req ctrl.Request) (ctrl updated.Status.CacheEntries = append(cacheEntries, v1alpha1.CacheEntry{ Key: v1alpha1.CacheEntryKeyDesiredReplicas, - Value: *replicas, + Value: computedReplicas, ExpirationTime: metav1.Time{Time: time.Now().Add(cacheDuration)}, }) } @@ -200,14 +179,59 @@ func (r *HorizontalRunnerAutoscalerReconciler) SetupWithManager(mgr ctrl.Manager Complete(r) } -func (r *HorizontalRunnerAutoscalerReconciler) computeReplicas(rd v1alpha1.RunnerDeployment, hra v1alpha1.HorizontalRunnerAutoscaler) (*int, error) { - var computedReplicas *int +func (r *HorizontalRunnerAutoscalerReconciler) computeReplicasWithCache(log logr.Logger, now time.Time, rd v1alpha1.RunnerDeployment, hra v1alpha1.HorizontalRunnerAutoscaler) (int, int, *int, error) { + minReplicas := defaultReplicas + if hra.Spec.MinReplicas != nil && *hra.Spec.MinReplicas > 0 { + minReplicas = *hra.Spec.MinReplicas + } - replicas, err := r.determineDesiredReplicas(rd, hra) - if err != nil { - return nil, err + var suggestedReplicas int + + suggestedReplicasFromCache := r.fetchSuggestedReplicasFromCache(hra) + + var cached *int + + if suggestedReplicasFromCache != nil { + cached = suggestedReplicasFromCache + + if cached == nil { + suggestedReplicas = minReplicas + } else { + suggestedReplicas = *cached + } + } else { + v, err := r.suggestDesiredReplicas(rd, hra) + if err != nil { + return 0, 0, nil, err + } + + if v == nil { + suggestedReplicas = minReplicas + } else { + suggestedReplicas = *v + } + } + + var reserved int + + for _, reservation := range hra.Spec.CapacityReservations { + if reservation.ExpirationTime.Time.After(now) { + reserved += reservation.Replicas + } + } + + newDesiredReplicas := suggestedReplicas + reserved + + if newDesiredReplicas < minReplicas { + newDesiredReplicas = minReplicas + } else if hra.Spec.MaxReplicas != nil && newDesiredReplicas > *hra.Spec.MaxReplicas { + newDesiredReplicas = *hra.Spec.MaxReplicas } + // + // Delay scaling-down for ScaleDownDelaySecondsAfterScaleUp or DefaultScaleDownDelay + // + var scaleDownDelay time.Duration if hra.Spec.ScaleDownDelaySecondsAfterScaleUp != nil { @@ -216,17 +240,50 @@ func (r *HorizontalRunnerAutoscalerReconciler) computeReplicas(rd v1alpha1.Runne scaleDownDelay = DefaultScaleDownDelay } - now := time.Now() + var scaleDownDelayUntil *time.Time if hra.Status.DesiredReplicas == nil || - *hra.Status.DesiredReplicas < *replicas || - hra.Status.LastSuccessfulScaleOutTime == nil || - hra.Status.LastSuccessfulScaleOutTime.Add(scaleDownDelay).Before(now) { + *hra.Status.DesiredReplicas < newDesiredReplicas || + hra.Status.LastSuccessfulScaleOutTime == nil { - computedReplicas = replicas + } else if hra.Status.LastSuccessfulScaleOutTime != nil { + t := hra.Status.LastSuccessfulScaleOutTime.Add(scaleDownDelay) + + // ScaleDownDelay is not passed + if t.After(now) { + scaleDownDelayUntil = &t + newDesiredReplicas = *hra.Status.DesiredReplicas + } } else { - computedReplicas = hra.Status.DesiredReplicas + newDesiredReplicas = *hra.Status.DesiredReplicas + } + + // + // Logs various numbers for monitoring and debugging purpose + // + + kvs := []interface{}{ + "suggested", suggestedReplicas, + "reserved", reserved, + "min", minReplicas, } - return computedReplicas, nil + if cached != nil { + kvs = append(kvs, "cached", *cached) + } + + if scaleDownDelayUntil != nil { + kvs = append(kvs, "last_scale_up_time", *hra.Status.LastSuccessfulScaleOutTime) + kvs = append(kvs, "scale_down_delay_until", scaleDownDelayUntil) + } + + if maxReplicas := hra.Spec.MaxReplicas; maxReplicas != nil { + kvs = append(kvs, "max", *maxReplicas) + } + + log.V(1).Info(fmt.Sprintf("Calculated desired replicas of %d", newDesiredReplicas), + kvs..., + ) + + return newDesiredReplicas, suggestedReplicas, suggestedReplicasFromCache, nil }