Skip to content

Commit

Permalink
Merge tag 'sched-urgent-2024-04-28' of git://git.kernel.org/pub/scm/l…
Browse files Browse the repository at this point in the history
…inux/kernel/git/tip/tip

Pull scheduler fixes from Ingo Molnar:

 - Fix EEVDF corner cases

 - Fix two nohz_full= related bugs that can cause boot crashes
   and warnings

* tag 'sched-urgent-2024-04-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  sched/isolation: Fix boot crash when maxcpus < first housekeeping CPU
  sched/isolation: Prevent boot crash when the boot CPU is nohz_full
  sched/eevdf: Prevent vlag from going out of bounds in reweight_eevdf()
  sched/eevdf: Fix miscalculation in reweight_entity() when se is not curr
  sched/eevdf: Always update V if se->on_rq when reweighting
  • Loading branch information
torvalds committed Apr 28, 2024
2 parents aec147c + 257bf89 commit 245c8e8
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 21 deletions.
7 changes: 2 additions & 5 deletions Documentation/timers/no_hz.rst
Original file line number Diff line number Diff line change
Expand Up @@ -129,11 +129,8 @@ adaptive-tick CPUs: At least one non-adaptive-tick CPU must remain
online to handle timekeeping tasks in order to ensure that system
calls like gettimeofday() returns accurate values on adaptive-tick CPUs.
(This is not an issue for CONFIG_NO_HZ_IDLE=y because there are no running
user processes to observe slight drifts in clock rate.) Therefore, the
boot CPU is prohibited from entering adaptive-ticks mode. Specifying a
"nohz_full=" mask that includes the boot CPU will result in a boot-time
error message, and the boot CPU will be removed from the mask. Note that
this means that your system must have at least two CPUs in order for
user processes to observe slight drifts in clock rate.) Note that this
means that your system must have at least two CPUs in order for
CONFIG_NO_HZ_FULL=y to do anything for you.

Finally, adaptive-ticks CPUs must have their RCU callbacks offloaded.
Expand Down
34 changes: 20 additions & 14 deletions kernel/sched/fair.c
Original file line number Diff line number Diff line change
Expand Up @@ -696,15 +696,21 @@ u64 avg_vruntime(struct cfs_rq *cfs_rq)
*
* XXX could add max_slice to the augmented data to track this.
*/
static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se)
static s64 entity_lag(u64 avruntime, struct sched_entity *se)
{
s64 lag, limit;
s64 vlag, limit;

vlag = avruntime - se->vruntime;
limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se);

return clamp(vlag, -limit, limit);
}

static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
SCHED_WARN_ON(!se->on_rq);
lag = avg_vruntime(cfs_rq) - se->vruntime;

limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se);
se->vlag = clamp(lag, -limit, limit);
se->vlag = entity_lag(avg_vruntime(cfs_rq), se);
}

/*
Expand Down Expand Up @@ -3676,11 +3682,10 @@ static inline void
dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { }
#endif

static void reweight_eevdf(struct cfs_rq *cfs_rq, struct sched_entity *se,
static void reweight_eevdf(struct sched_entity *se, u64 avruntime,
unsigned long weight)
{
unsigned long old_weight = se->load.weight;
u64 avruntime = avg_vruntime(cfs_rq);
s64 vlag, vslice;

/*
Expand Down Expand Up @@ -3761,7 +3766,7 @@ static void reweight_eevdf(struct cfs_rq *cfs_rq, struct sched_entity *se,
* = V - vl'
*/
if (avruntime != se->vruntime) {
vlag = (s64)(avruntime - se->vruntime);
vlag = entity_lag(avruntime, se);
vlag = div_s64(vlag * old_weight, weight);
se->vruntime = avruntime - vlag;
}
Expand All @@ -3787,25 +3792,26 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
unsigned long weight)
{
bool curr = cfs_rq->curr == se;
u64 avruntime;

if (se->on_rq) {
/* commit outstanding execution time */
if (curr)
update_curr(cfs_rq);
else
update_curr(cfs_rq);
avruntime = avg_vruntime(cfs_rq);
if (!curr)
__dequeue_entity(cfs_rq, se);
update_load_sub(&cfs_rq->load, se->load.weight);
}
dequeue_load_avg(cfs_rq, se);

if (!se->on_rq) {
if (se->on_rq) {
reweight_eevdf(se, avruntime, weight);
} else {
/*
* Because we keep se->vlag = V - v_i, while: lag_i = w_i*(V - v_i),
* we need to scale se->vlag when w_i changes.
*/
se->vlag = div_s64(se->vlag * se->load.weight, weight);
} else {
reweight_eevdf(cfs_rq, se, weight);
}

update_load_set(&se->load, weight);
Expand Down
18 changes: 16 additions & 2 deletions kernel/sched/isolation.c
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,16 @@ int housekeeping_any_cpu(enum hk_type type)
if (cpu < nr_cpu_ids)
return cpu;

return cpumask_any_and(housekeeping.cpumasks[type], cpu_online_mask);
cpu = cpumask_any_and(housekeeping.cpumasks[type], cpu_online_mask);
if (likely(cpu < nr_cpu_ids))
return cpu;
/*
* Unless we have another problem this can only happen
* at boot time before start_secondary() brings the 1st
* housekeeping CPU up.
*/
WARN_ON_ONCE(system_state == SYSTEM_RUNNING ||
type != HK_TYPE_TIMER);
}
}
return smp_processor_id();
Expand Down Expand Up @@ -109,6 +118,7 @@ static void __init housekeeping_setup_type(enum hk_type type,
static int __init housekeeping_setup(char *str, unsigned long flags)
{
cpumask_var_t non_housekeeping_mask, housekeeping_staging;
unsigned int first_cpu;
int err = 0;

if ((flags & HK_FLAG_TICK) && !(housekeeping.flags & HK_FLAG_TICK)) {
Expand All @@ -129,7 +139,8 @@ static int __init housekeeping_setup(char *str, unsigned long flags)
cpumask_andnot(housekeeping_staging,
cpu_possible_mask, non_housekeeping_mask);

if (!cpumask_intersects(cpu_present_mask, housekeeping_staging)) {
first_cpu = cpumask_first_and(cpu_present_mask, housekeeping_staging);
if (first_cpu >= nr_cpu_ids || first_cpu >= setup_max_cpus) {
__cpumask_set_cpu(smp_processor_id(), housekeeping_staging);
__cpumask_clear_cpu(smp_processor_id(), non_housekeeping_mask);
if (!housekeeping.flags) {
Expand All @@ -138,6 +149,9 @@ static int __init housekeeping_setup(char *str, unsigned long flags)
}
}

if (cpumask_empty(non_housekeeping_mask))
goto free_housekeeping_staging;

if (!housekeeping.flags) {
/* First setup call ("nohz_full=" or "isolcpus=") */
enum hk_type type;
Expand Down

0 comments on commit 245c8e8

Please sign in to comment.