diff options
Diffstat (limited to 'kernel/sched/fair.c')
-rw-r--r-- | kernel/sched/fair.c | 54 |
1 files changed, 51 insertions, 3 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index ac7fb39c3..5518a9621 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2881,7 +2881,8 @@ static inline void update_load_avg(struct sched_entity *se, int update_tg) { struct cfs_rq *cfs_rq = cfs_rq_of(se); u64 now = cfs_rq_clock_task(cfs_rq); - int cpu = cpu_of(rq_of(cfs_rq)); + struct rq *rq = rq_of(cfs_rq); + int cpu = cpu_of(rq); /* * Track task load average for carrying it to new CPU after migrated, and @@ -2893,6 +2894,29 @@ static inline void update_load_avg(struct sched_entity *se, int update_tg) if (update_cfs_rq_load_avg(now, cfs_rq) && update_tg) update_tg_load_avg(cfs_rq, 0); + + if (cpu == smp_processor_id() && &rq->cfs == cfs_rq) { + unsigned long max = rq->cpu_capacity_orig; + + /* + * There are a few boundary cases this might miss but it should + * get called often enough that that should (hopefully) not be + * a real problem -- added to that it only calls on the local + * CPU, so if we enqueue remotely we'll miss an update, but + * the next tick/schedule should update. + * + * It will not get called when we go idle, because the idle + * thread is a different class (!fair), nor will the utilization + * number include things like RT tasks. + * + * As is, the util number is not freq-invariant (we'd have to + * implement arch_scale_freq_capacity() for that). + * + * See cpu_util(). + */ + cpufreq_update_util(rq_clock(rq), + min(cfs_rq->avg.util_avg, max), max); + } } static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) @@ -3031,7 +3055,14 @@ static int idle_balance(struct rq *this_rq); #else /* CONFIG_SMP */ -static inline void update_load_avg(struct sched_entity *se, int update_tg) {} +static inline void update_load_avg(struct sched_entity *se, int not_used) +{ + struct cfs_rq *cfs_rq = cfs_rq_of(se); + struct rq *rq = rq_of(cfs_rq); + + cpufreq_trigger_update(rq_clock(rq)); +} + static inline void enqueue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) {} static inline void @@ -5072,7 +5103,19 @@ static int select_idle_sibling(struct task_struct *p, int target) return i; /* - * Otherwise, iterate the domains and find an elegible idle cpu. + * Otherwise, iterate the domains and find an eligible idle cpu. + * + * A completely idle sched group at higher domains is more + * desirable than an idle group at a lower level, because lower + * domains have smaller groups and usually share hardware + * resources which causes tasks to contend on them, e.g. x86 + * hyperthread siblings in the lowest domain (SMT) can contend + * on the shared cpu pipeline. + * + * However, while we prefer idle groups at higher domains + * finding an idle cpu at the lowest domain is still better than + * returning 'target', which we've already established, isn't + * idle. */ sd = rcu_dereference(per_cpu(sd_llc, target)); for_each_lower_domain(sd) { @@ -5082,11 +5125,16 @@ static int select_idle_sibling(struct task_struct *p, int target) tsk_cpus_allowed(p))) goto next; + /* Ensure the entire group is idle */ for_each_cpu(i, sched_group_cpus(sg)) { if (i == target || !idle_cpu(i)) goto next; } + /* + * It doesn't matter which cpu we pick, the + * whole group is idle. + */ target = cpumask_first_and(sched_group_cpus(sg), tsk_cpus_allowed(p)); goto done; |