Skip to content

Commit

Permalink
sched: Scale down cpu_power due to RT tasks
Browse files Browse the repository at this point in the history
Keep an average on the amount of time spend on RT tasks and use
that fraction to scale down the cpu_power for regular tasks.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Tested-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Acked-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Acked-by: Gautham R Shenoy <ego@in.ibm.com>
Cc: Balbir Singh <balbir@in.ibm.com>
LKML-Reference: <20090901083826.287778431@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
  • Loading branch information
Peter Zijlstra authored and Ingo Molnar committed Sep 4, 2009
1 parent ab29230 commit e9e9250
Show file tree
Hide file tree
Showing 4 changed files with 72 additions and 7 deletions.
1 change: 1 addition & 0 deletions include/linux/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -1831,6 +1831,7 @@ extern unsigned int sysctl_sched_child_runs_first;
extern unsigned int sysctl_sched_features;
extern unsigned int sysctl_sched_migration_cost;
extern unsigned int sysctl_sched_nr_migrate;
extern unsigned int sysctl_sched_time_avg;
extern unsigned int sysctl_timer_migration;

int sched_nr_latency_handler(struct ctl_table *table, int write,
Expand Down
64 changes: 61 additions & 3 deletions kernel/sched.c
Original file line number Diff line number Diff line change
Expand Up @@ -627,6 +627,9 @@ struct rq {

struct task_struct *migration_thread;
struct list_head migration_queue;

u64 rt_avg;
u64 age_stamp;
#endif

/* calc_load related fields */
Expand Down Expand Up @@ -862,6 +865,14 @@ unsigned int sysctl_sched_shares_ratelimit = 250000;
*/
unsigned int sysctl_sched_shares_thresh = 4;

/*
* period over which we average the RT time consumption, measured
* in ms.
*
* default: 1s
*/
const_debug unsigned int sysctl_sched_time_avg = MSEC_PER_SEC;

/*
* period over which we measure -rt task cpu usage in us.
* default: 1s
Expand Down Expand Up @@ -1280,12 +1291,37 @@ void wake_up_idle_cpu(int cpu)
}
#endif /* CONFIG_NO_HZ */

static u64 sched_avg_period(void)
{
return (u64)sysctl_sched_time_avg * NSEC_PER_MSEC / 2;
}

static void sched_avg_update(struct rq *rq)
{
s64 period = sched_avg_period();

while ((s64)(rq->clock - rq->age_stamp) > period) {
rq->age_stamp += period;
rq->rt_avg /= 2;
}
}

static void sched_rt_avg_update(struct rq *rq, u64 rt_delta)
{
rq->rt_avg += rt_delta;
sched_avg_update(rq);
}

#else /* !CONFIG_SMP */
static void resched_task(struct task_struct *p)
{
assert_spin_locked(&task_rq(p)->lock);
set_tsk_need_resched(p);
}

static void sched_rt_avg_update(struct rq *rq, u64 rt_delta)
{
}
#endif /* CONFIG_SMP */

#if BITS_PER_LONG == 32
Expand Down Expand Up @@ -3699,7 +3735,7 @@ static inline int check_power_save_busiest_group(struct sd_lb_stats *sds,
}
#endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */

unsigned long __weak arch_smt_gain(struct sched_domain *sd, int cpu)
unsigned long __weak arch_scale_smt_power(struct sched_domain *sd, int cpu)
{
unsigned long weight = cpumask_weight(sched_domain_span(sd));
unsigned long smt_gain = sd->smt_gain;
Expand All @@ -3709,6 +3745,24 @@ unsigned long __weak arch_smt_gain(struct sched_domain *sd, int cpu)
return smt_gain;
}

unsigned long scale_rt_power(int cpu)
{
struct rq *rq = cpu_rq(cpu);
u64 total, available;

sched_avg_update(rq);

total = sched_avg_period() + (rq->clock - rq->age_stamp);
available = total - rq->rt_avg;

if (unlikely((s64)total < SCHED_LOAD_SCALE))
total = SCHED_LOAD_SCALE;

total >>= SCHED_LOAD_SHIFT;

return div_u64(available, total);
}

static void update_cpu_power(struct sched_domain *sd, int cpu)
{
unsigned long weight = cpumask_weight(sched_domain_span(sd));
Expand All @@ -3719,11 +3773,15 @@ static void update_cpu_power(struct sched_domain *sd, int cpu)
/* here we could scale based on cpufreq */

if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1) {
power *= arch_smt_gain(sd, cpu);
power *= arch_scale_smt_power(sd, cpu);
power >>= SCHED_LOAD_SHIFT;
}

/* here we could scale based on RT time */
power *= scale_rt_power(cpu);
power >>= SCHED_LOAD_SHIFT;

if (!power)
power = 1;

if (power != old) {
sdg->__cpu_power = power;
Expand Down
6 changes: 2 additions & 4 deletions kernel/sched_rt.c
Original file line number Diff line number Diff line change
Expand Up @@ -615,6 +615,8 @@ static void update_curr_rt(struct rq *rq)
curr->se.exec_start = rq->clock;
cpuacct_charge(curr, delta_exec);

sched_rt_avg_update(rq, delta_exec);

if (!rt_bandwidth_enabled())
return;

Expand Down Expand Up @@ -887,8 +889,6 @@ static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup)

if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1)
enqueue_pushable_task(rq, p);

inc_cpu_load(rq, p->se.load.weight);
}

static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
Expand All @@ -899,8 +899,6 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
dequeue_rt_entity(rt_se);

dequeue_pushable_task(rq, p);

dec_cpu_load(rq, p->se.load.weight);
}

/*
Expand Down
8 changes: 8 additions & 0 deletions kernel/sysctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,14 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{
.ctl_name = CTL_UNNUMBERED,
.procname = "sched_time_avg",
.data = &sysctl_sched_time_avg,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{
.ctl_name = CTL_UNNUMBERED,
.procname = "timer_migration",
Expand Down

0 comments on commit e9e9250

Please sign in to comment.