summaryrefslogtreecommitdiff
path: root/kernel/sched/bfs.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched/bfs.c')
-rw-r--r--kernel/sched/bfs.c500
1 files changed, 229 insertions, 271 deletions
diff --git a/kernel/sched/bfs.c b/kernel/sched/bfs.c
index 67f93e752..bb5bac4b2 100644
--- a/kernel/sched/bfs.c
+++ b/kernel/sched/bfs.c
@@ -24,7 +24,7 @@
* 2007-07-01 Group scheduling enhancements by Srivatsa Vaddagiri
* 2007-11-29 RT balancing improvements by Steven Rostedt, Gregory Haskins,
* Thomas Gleixner, Mike Kravetz
- * now Brainfuck deadline scheduling policy by Con Kolivas deletes
+ * 2009-08-13 Brainfuck deadline scheduling policy by Con Kolivas deletes
* a whole lot of those previous things.
*/
@@ -137,7 +137,7 @@
void print_scheduler_version(void)
{
- printk(KERN_INFO "BFS CPU scheduler v0.502 by Con Kolivas.\n");
+ printk(KERN_INFO "BFS CPU scheduler v0.512 by Con Kolivas.\n");
}
/*
@@ -403,7 +403,6 @@ static inline void grq_lock_irq(void)
}
static inline void time_lock_grq(struct rq *rq)
- __acquires(grq.lock)
{
grq_lock();
update_clocks(rq);
@@ -429,86 +428,35 @@ static inline void grq_unlock_irqrestore(unsigned long *flags)
static inline struct rq
*task_grq_lock(struct task_struct *p, unsigned long *flags)
- __acquires(grq.lock)
+ __acquires(p->pi_lock)
{
- grq_lock_irqsave(flags);
+ raw_spin_lock_irqsave(&p->pi_lock, *flags);
+ grq_lock();
return task_rq(p);
}
static inline struct rq
*time_task_grq_lock(struct task_struct *p, unsigned long *flags)
- __acquires(grq.lock)
{
struct rq *rq = task_grq_lock(p, flags);
- update_clocks(rq);
- return rq;
-}
-static inline struct rq *task_grq_lock_irq(struct task_struct *p)
- __acquires(grq.lock)
-{
- grq_lock_irq();
- return task_rq(p);
-}
-
-static inline void time_task_grq_lock_irq(struct task_struct *p)
- __acquires(grq.lock)
-{
- struct rq *rq = task_grq_lock_irq(p);
update_clocks(rq);
+ return rq;
}
-static inline void task_grq_unlock_irq(void)
- __releases(grq.lock)
-{
- grq_unlock_irq();
-}
-
-static inline void task_grq_unlock(unsigned long *flags)
- __releases(grq.lock)
-{
- grq_unlock_irqrestore(flags);
-}
-
-/**
- * grunqueue_is_locked
- *
- * Returns true if the global runqueue is locked.
- * This interface allows printk to be called with the runqueue lock
- * held and know whether or not it is OK to wake up the klogd.
- */
-bool grunqueue_is_locked(void)
-{
- return raw_spin_is_locked(&grq.lock);
-}
-
-void grq_unlock_wait(void)
- __releases(grq.lock)
+static inline void task_grq_unlock(struct task_struct *p, unsigned long *flags)
+ __releases(p->pi_lock)
{
- smp_mb(); /* spin-unlock-wait is not a full memory barrier */
- raw_spin_unlock_wait(&grq.lock);
+ grq_unlock();
+ raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
}
static inline void time_grq_lock(struct rq *rq, unsigned long *flags)
- __acquires(grq.lock)
{
local_irq_save(*flags);
time_lock_grq(rq);
}
-static inline struct rq *__task_grq_lock(struct task_struct *p)
- __acquires(grq.lock)
-{
- grq_lock();
- return task_rq(p);
-}
-
-static inline void __task_grq_unlock(void)
- __releases(grq.lock)
-{
- grq_unlock();
-}
-
static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
{
}
@@ -540,6 +488,40 @@ static inline bool deadline_after(u64 deadline, u64 time)
}
/*
+ * Deadline is "now" in niffies + (offset by priority). Setting the deadline
+ * is the key to everything. It distributes cpu fairly amongst tasks of the
+ * same nice value, it proportions cpu according to nice level, it means the
+ * task that last woke up the longest ago has the earliest deadline, thus
+ * ensuring that interactive tasks get low latency on wake up. The CPU
+ * proportion works out to the square of the virtual deadline difference, so
+ * this equation will give nice 19 3% CPU compared to nice 0.
+ */
+static inline u64 prio_deadline_diff(int user_prio)
+{
+ return (prio_ratios[user_prio] * rr_interval * (MS_TO_NS(1) / 128));
+}
+
+static inline u64 task_deadline_diff(struct task_struct *p)
+{
+ return prio_deadline_diff(TASK_USER_PRIO(p));
+}
+
+static inline u64 static_deadline_diff(int static_prio)
+{
+ return prio_deadline_diff(USER_PRIO(static_prio));
+}
+
+static inline int longest_deadline_diff(void)
+{
+ return prio_deadline_diff(39);
+}
+
+static inline int ms_longest_deadline_diff(void)
+{
+ return NS_TO_MS(longest_deadline_diff());
+}
+
+/*
* A task that is not running or queued will not have a node set.
* A task that is queued but not running will have a node set.
* A task that is currently running will have ->on_cpu set but no node set.
@@ -561,14 +543,23 @@ static void dequeue_task(struct task_struct *p)
sched_info_dequeued(task_rq(p), p);
}
+#ifdef CONFIG_PREEMPT_RCU
+static bool rcu_read_critical(struct task_struct *p)
+{
+ return p->rcu_read_unlock_special.b.blocked;
+}
+#else /* CONFIG_PREEMPT_RCU */
+#define rcu_read_critical(p) (false)
+#endif /* CONFIG_PREEMPT_RCU */
+
/*
* To determine if it's safe for a task of SCHED_IDLEPRIO to actually run as
* an idle task, we ensure none of the following conditions are met.
*/
static bool idleprio_suitable(struct task_struct *p)
{
- return (!freezing(p) && !signal_pending(p) &&
- !(task_contributes_to_load(p)) && !(p->flags & (PF_EXITING)));
+ return (!(task_contributes_to_load(p)) && !(p->flags & (PF_EXITING)) &&
+ !signal_pending(p) && !rcu_read_critical(p) && !freezing(p));
}
/*
@@ -612,9 +603,13 @@ static void enqueue_task(struct task_struct *p, struct rq *rq)
sl_id = p->prio;
else {
sl_id = p->deadline;
- /* Set it to cope with 4 left shifts with locality_diff */
- if (p->prio == IDLE_PRIO)
- sl_id |= 0x0F00000000000000;
+ if (idleprio_task(p)) {
+ /* Set it to cope with 4 left shifts with locality_diff */
+ if (p->prio == IDLE_PRIO)
+ sl_id |= 0x00FF000000000000;
+ else
+ sl_id += longest_deadline_diff();
+ }
}
/*
* Some architectures don't have better than microsecond resolution
@@ -1008,15 +1003,18 @@ static inline void deactivate_task(struct task_struct *p, struct rq *rq)
#ifdef CONFIG_SMP
void set_task_cpu(struct task_struct *p, unsigned int cpu)
{
- unsigned int tcpu;
-
#ifdef CONFIG_LOCKDEP
/*
- * The caller should hold grq lock.
+ * The caller should hold either p->pi_lock or grq lock, when changing
+ * a task's CPU. ->pi_lock for waking tasks, grq lock for runnable tasks.
+ *
+ * Furthermore, all task_rq users should acquire both locks, see
+ * task_grq_lock().
*/
- WARN_ON_ONCE(debug_locks && !lockdep_is_held(&grq.lock));
+ WARN_ON_ONCE(debug_locks && !(lockdep_is_held(&p->pi_lock) ||
+ lockdep_is_held(&grq.lock)));
#endif
- if ((tcpu = task_cpu(p)) == cpu)
+ if (task_cpu(p) == cpu)
return;
trace_sched_migrate_task(p, cpu);
perf_event_task_migrate(p);
@@ -1027,6 +1025,7 @@ void set_task_cpu(struct task_struct *p, unsigned int cpu)
* per-task data have been completed by this moment.
*/
smp_wmb();
+
if (p->on_rq) {
struct rq *rq = task_rq(p);
@@ -1166,7 +1165,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
ncsw = 0;
if (!match_state || p->state == match_state)
ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
- task_grq_unlock(&flags);
+ task_grq_unlock(p, &flags);
/*
* If it changed from the expected state, bail out now.
@@ -1292,9 +1291,7 @@ static inline bool needs_other_cpu(struct task_struct *p, int cpu)
static void try_preempt(struct task_struct *p, struct rq *this_rq)
{
- int cpu, pcpu, highest_prio, highest_cpu;
- struct rq *highest_prio_rq;
- u64 latest_deadline;
+ int i, this_entries = this_rq->soft_affined;
cpumask_t tmp;
if (suitable_idle_cpus(p) && resched_best_idle(p))
@@ -1306,56 +1303,32 @@ static void try_preempt(struct task_struct *p, struct rq *this_rq)
cpumask_and(&tmp, &cpu_online_map, &p->cpus_allowed);
- /* See if this task can preempt the task on the current CPU first. */
- pcpu = cpu_of(this_rq);
- if (likely(cpumask_test_cpu(pcpu, &tmp))) {
- if (smt_schedule(p, this_rq) && can_preempt(p, this_rq->rq_prio, this_rq->rq_deadline)) {
- resched_curr(this_rq);
- return;
- }
- cpumask_clear_cpu(pcpu, &tmp);
- }
-
- highest_prio = latest_deadline = 0;
- highest_prio_rq = NULL;
-
- /* Now look for the CPU with the latest deadline */
- for_each_cpu(cpu, &tmp) {
- struct rq *rq;
- int rq_prio;
- u64 dl;
+ /*
+ * We iterate over CPUs in locality order using rq_order, finding the
+ * first one we can preempt if possible, thus staying closest in
+ * locality.
+ */
+ for (i = 0; i < num_possible_cpus(); i++) {
+ struct rq *rq = this_rq->rq_order[i];
- rq = cpu_rq(cpu);
- rq_prio = rq->rq_prio;
- if (rq_prio < highest_prio)
+ if (!cpumask_test_cpu(rq->cpu, &tmp))
continue;
- dl = rq->rq_deadline;
- if (!sched_interactive && pcpu != cpu)
- dl <<= locality_diff(pcpu, rq);
- if (rq_prio > highest_prio ||
- deadline_after(dl, latest_deadline)) {
- latest_deadline = dl;
- highest_prio = rq_prio;
- highest_cpu = cpu;
- highest_prio_rq = rq;
+ if (!sched_interactive && rq != this_rq && rq->soft_affined <= this_entries)
+ continue;
+ if (smt_schedule(p, rq) && can_preempt(p, rq->rq_prio, rq->rq_deadline)) {
+ /*
+ * If we have decided this task should preempt this CPU,
+ * set the task's CPU to match thereby speeding up matching
+ * this task in earliest_deadline_task.
+ */
+ set_task_cpu(p, rq->cpu);
+ resched_curr(rq);
+ return;
}
}
-
- if (unlikely(!highest_prio_rq))
- return;
- if (!smt_schedule(p, highest_prio_rq))
- return;
- if (can_preempt(p, highest_prio, latest_deadline)) {
- /*
- * If we have decided this task should preempt this CPU,
- * set the task's CPU to match thereby speeding up matching
- * this task in earliest_deadline_task.
- */
- set_task_cpu(p, highest_cpu);
- resched_curr(highest_prio_rq);
- }
}
+
static int __set_cpus_allowed_ptr(struct task_struct *p,
const struct cpumask *new_mask, bool check);
#else /* CONFIG_SMP */
@@ -1501,8 +1474,6 @@ static bool try_to_wake_up(struct task_struct *p, unsigned int state,
struct rq *rq;
int cpu;
- get_cpu();
-
/*
* If we are going to wake up a thread waiting for CONDITION we
* need to ensure that CONDITION=1 done by the caller can not be
@@ -1533,13 +1504,11 @@ static bool try_to_wake_up(struct task_struct *p, unsigned int state,
out_running:
ttwu_post_activation(p, rq, success);
out_unlock:
- task_grq_unlock(&flags);
+ task_grq_unlock(p, &flags);
if (schedstat_enabled())
ttwu_stat(p, cpu, wake_flags);
- put_cpu();
-
return success;
}
@@ -1629,6 +1598,13 @@ int sched_fork(unsigned long __maybe_unused clone_flags, struct task_struct *p)
skiplist_node_init(&p->node);
/*
+ * We mark the process as NEW here. This guarantees that
+ * nobody will actually run it, and a signal or other external
+ * event cannot wake it up and insert it on the runqueue either.
+ */
+ p->state = TASK_NEW;
+
+ /*
* Revert to default priority/policy on fork if requested.
*/
if (unlikely(p->sched_reset_on_fork)) {
@@ -1744,12 +1720,16 @@ static inline void init_schedstats(void) {}
*/
void wake_up_new_task(struct task_struct *p)
{
- struct task_struct *parent;
+ struct task_struct *parent, *rq_curr;
+ struct rq *rq, *new_rq;
unsigned long flags;
- struct rq *rq;
parent = p->parent;
rq = task_grq_lock(p, &flags);
+ if (unlikely(needs_other_cpu(p, task_cpu(p))))
+ set_task_cpu(p, cpumask_any(tsk_cpus_allowed(p)));
+ rq_curr = rq->curr;
+ p->state = TASK_RUNNING;
/*
* Reinit new task deadline as its creator deadline could have changed
@@ -1757,22 +1737,20 @@ void wake_up_new_task(struct task_struct *p)
*/
p->deadline = rq->rq_deadline;
- /*
- * If the task is a new process, current and parent are the same. If
- * the task is a new thread in the thread group, it will have much more
- * in common with current than with the parent.
- */
- set_task_cpu(p, task_cpu(rq->curr));
+ /* The new task might not be able to run on the same CPU as rq->curr */
+ if (unlikely(needs_other_cpu(p, task_cpu(p)))) {
+ set_task_cpu(p, cpumask_any(tsk_cpus_allowed(p)));
+ new_rq = task_rq(p);
+ } else
+ new_rq = rq;
/*
* Make sure we do not leak PI boosting priority to the child.
*/
- p->prio = rq->curr->normal_prio;
+ p->prio = rq_curr->normal_prio;
activate_task(p, rq);
trace_sched_wakeup_new(p);
- if (unlikely(p->policy == SCHED_FIFO))
- goto after_ts_init;
/*
* Share the timeslice between parent and child, thus the
@@ -1784,33 +1762,39 @@ void wake_up_new_task(struct task_struct *p)
* is always equal to current->deadline.
*/
p->last_ran = rq->rq_last_ran;
- if (likely(rq->rq_time_slice >= RESCHED_US * 2)) {
+ if (likely(rq_curr->policy != SCHED_FIFO)) {
rq->rq_time_slice /= 2;
- p->time_slice = rq->rq_time_slice;
-after_ts_init:
- if (rq->curr == parent && !suitable_idle_cpus(p)) {
+ if (unlikely(rq->rq_time_slice < RESCHED_US)) {
/*
- * The VM isn't cloned, so we're in a good position to
- * do child-runs-first in anticipation of an exec. This
- * usually avoids a lot of COW overhead.
+ * Forking task has run out of timeslice. Reschedule it and
+ * start its child with a new time slice and deadline. The
+ * child will end up running first because its deadline will
+ * be slightly earlier.
*/
- __set_tsk_resched(parent);
- } else
- try_preempt(p, rq);
- } else {
- if (rq->curr == parent) {
- /*
- * Forking task has run out of timeslice. Reschedule it and
- * start its child with a new time slice and deadline. The
- * child will end up running first because its deadline will
- * be slightly earlier.
- */
rq->rq_time_slice = 0;
- __set_tsk_resched(parent);
+ __set_tsk_resched(rq_curr);
+ time_slice_expired(p);
+ if (suitable_idle_cpus(p))
+ resched_best_idle(p);
+ else if (unlikely(rq != new_rq))
+ try_preempt(p, new_rq);
+ } else {
+ p->time_slice = rq->rq_time_slice;
+ if (rq_curr == parent && rq == new_rq && !suitable_idle_cpus(p)) {
+ /*
+ * The VM isn't cloned, so we're in a good position to
+ * do child-runs-first in anticipation of an exec. This
+ * usually avoids a lot of COW overhead.
+ */
+ __set_tsk_resched(rq_curr);
+ } else
+ try_preempt(p, new_rq);
}
+ } else {
time_slice_expired(p);
+ try_preempt(p, new_rq);
}
- task_grq_unlock(&flags);
+ task_grq_unlock(p, &flags);
}
#ifdef CONFIG_PREEMPT_NOTIFIERS
@@ -2724,7 +2708,7 @@ unsigned long long task_sched_runtime(struct task_struct *p)
rq = task_grq_lock(p, &flags);
ns = p->sched_time + do_task_delta_exec(p, rq);
- task_grq_unlock(&flags);
+ task_grq_unlock(p, &flags);
return ns;
}
@@ -2978,7 +2962,7 @@ static void task_running_tick(struct rq *rq)
grq_lock();
requeue_task(p);
- __set_tsk_resched(p);
+ resched_task(p);
grq_unlock();
}
@@ -3083,40 +3067,6 @@ static inline void preempt_latency_stop(int val) { }
#endif
/*
- * Deadline is "now" in niffies + (offset by priority). Setting the deadline
- * is the key to everything. It distributes cpu fairly amongst tasks of the
- * same nice value, it proportions cpu according to nice level, it means the
- * task that last woke up the longest ago has the earliest deadline, thus
- * ensuring that interactive tasks get low latency on wake up. The CPU
- * proportion works out to the square of the virtual deadline difference, so
- * this equation will give nice 19 3% CPU compared to nice 0.
- */
-static inline u64 prio_deadline_diff(int user_prio)
-{
- return (prio_ratios[user_prio] * rr_interval * (MS_TO_NS(1) / 128));
-}
-
-static inline u64 task_deadline_diff(struct task_struct *p)
-{
- return prio_deadline_diff(TASK_USER_PRIO(p));
-}
-
-static inline u64 static_deadline_diff(int static_prio)
-{
- return prio_deadline_diff(USER_PRIO(static_prio));
-}
-
-static inline int longest_deadline_diff(void)
-{
- return prio_deadline_diff(39);
-}
-
-static inline int ms_longest_deadline_diff(void)
-{
- return NS_TO_MS(longest_deadline_diff());
-}
-
-/*
* The time_slice is only refilled when it is empty and that is when we set a
* new deadline.
*/
@@ -3215,13 +3165,12 @@ found_middle:
static inline struct
task_struct *earliest_deadline_task(struct rq *rq, int cpu, struct task_struct *idle)
{
- struct task_struct *edt = idle;
skiplist_node *node = &grq.node;
+ struct task_struct *edt = idle;
u64 earliest_deadline = ~0ULL;
while ((node = node->next[0]) != &grq.node) {
struct task_struct *p = node->value;
- int tcpu;
/* Make sure affinity is ok */
if (needs_other_cpu(p, cpu))
@@ -3230,22 +3179,24 @@ task_struct *earliest_deadline_task(struct rq *rq, int cpu, struct task_struct *
if (!smt_schedule(p, rq))
continue;
- if (!sched_interactive && (tcpu = task_cpu(p)) != cpu) {
- u64 dl = p->deadline << locality_diff(tcpu, rq);
+ if (!sched_interactive) {
+ int tcpu;
+
+ if ((tcpu = task_cpu(p)) != cpu) {
+ u64 dl = p->deadline << locality_diff(tcpu, rq);
- if (unlikely(!deadline_before(dl, earliest_deadline)))
+ if (!deadline_before(dl, earliest_deadline))
+ continue;
+ earliest_deadline = dl;
+ edt = p;
+ /* We continue even though we've found the earliest
+ * deadline task as the locality offset means there
+ * may be a better candidate after it. */
continue;
- earliest_deadline = dl;
- edt = p;
- /* We continue even though we've found the earliest
- * deadline task as the locality offset means there
- * may be a better candidate after it. */
- continue;
+ }
}
- /* This wouldn't happen if we encountered a better deadline from
- * another CPU and have already set edt. */
- if (likely(p->deadline < earliest_deadline))
- edt = p;
+ /* We've encountered the best deadline local task */
+ edt = p;
break;
}
if (likely(edt != idle))
@@ -3275,6 +3226,9 @@ static noinline void __schedule_bug(struct task_struct *prev)
pr_cont("\n");
}
#endif
+ if (panic_on_warn)
+ panic("scheduling while atomic\n");
+
dump_stack();
add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
}
@@ -3316,10 +3270,6 @@ static inline void set_rq_task(struct rq *rq, struct task_struct *p)
rq->rq_mm = p->mm;
rq->rq_smt_bias = p->smt_bias;
#endif
- if (p != rq->idle)
- rq->rq_running = true;
- else
- rq->rq_running = false;
}
static void reset_rq_task(struct rq *rq, struct task_struct *p)
@@ -3353,7 +3303,7 @@ static void check_smt_siblings(struct rq *this_rq)
if (unlikely(!rq->online))
continue;
p = rq->curr;
- if (!smt_should_schedule(p, this_rq)) {
+ if (!smt_schedule(p, this_rq)) {
set_tsk_need_resched(p);
smp_send_reschedule(other_cpu);
}
@@ -3546,8 +3496,6 @@ static void __sched notrace __schedule(bool preempt)
trace_sched_switch(preempt, prev, next);
rq = context_switch(rq, prev, next); /* unlocks the grq */
- cpu = cpu_of(rq);
- idle = rq->idle;
} else {
check_siblings(rq);
grq_unlock_irq();
@@ -3766,8 +3714,8 @@ EXPORT_SYMBOL(default_wake_function);
void rt_mutex_setprio(struct task_struct *p, int prio)
{
unsigned long flags;
- int queued, oldprio;
struct rq *rq;
+ int oldprio;
BUG_ON(prio < 0 || prio > MAX_PRIO);
@@ -3793,19 +3741,18 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
trace_sched_pi_setprio(p, prio);
oldprio = p->prio;
- queued = task_queued(p);
- if (queued)
- dequeue_task(p);
p->prio = prio;
- if (task_running(p) && prio > oldprio)
- resched_task(p);
- if (queued) {
+ if (task_running(p)){
+ if (prio > oldprio)
+ resched_task(p);
+ } else if (task_queued(p)) {
+ dequeue_task(p);
enqueue_task(p, rq);
- try_preempt(p, rq);
+ if (prio < oldprio)
+ try_preempt(p, rq);
}
-
out_unlock:
- task_grq_unlock(&flags);
+ task_grq_unlock(p, &flags);
}
#endif
@@ -3821,7 +3768,7 @@ static inline void adjust_deadline(struct task_struct *p, int new_prio)
void set_user_nice(struct task_struct *p, long nice)
{
- int queued, new_static, old_static;
+ int new_static, old_static;
unsigned long flags;
struct rq *rq;
@@ -3843,16 +3790,14 @@ void set_user_nice(struct task_struct *p, long nice)
p->static_prio = new_static;
goto out_unlock;
}
- queued = task_queued(p);
- if (queued)
- dequeue_task(p);
adjust_deadline(p, new_static);
old_static = p->static_prio;
p->static_prio = new_static;
p->prio = effective_prio(p);
- if (queued) {
+ if (task_queued(p)) {
+ dequeue_task(p);
enqueue_task(p, rq);
if (new_static < old_static)
try_preempt(p, rq);
@@ -3862,7 +3807,7 @@ void set_user_nice(struct task_struct *p, long nice)
resched_task(p);
}
out_unlock:
- task_grq_unlock(&flags);
+ task_grq_unlock(p, &flags);
}
EXPORT_SYMBOL(set_user_nice);
@@ -4002,11 +3947,15 @@ static void __setscheduler(struct task_struct *p, struct rq *rq, int policy,
p->prio = rt_mutex_get_effective_prio(p, p->normal_prio);
} else
p->prio = p->normal_prio;
+
if (task_running(p)) {
reset_rq_task(rq, p);
- /* Resched only if we might now be preempted */
- if (p->prio > oldprio || p->rt_priority > oldrtprio)
- resched_task(p);
+ resched_task(p);
+ } else if (task_queued(p)) {
+ dequeue_task(p);
+ enqueue_task(p, rq);
+ if (p->prio < oldprio || p->rt_priority > oldrtprio)
+ try_preempt(p, rq);
}
}
@@ -4031,8 +3980,8 @@ __sched_setscheduler(struct task_struct *p, int policy,
const struct sched_param *param, bool user, bool pi)
{
struct sched_param zero_param = { .sched_priority = 0 };
- int queued, retval, oldpolicy = -1;
unsigned long flags, rlim_rtprio = 0;
+ int retval, oldpolicy = -1;
int reset_on_fork;
struct rq *rq;
@@ -4142,20 +4091,17 @@ recheck:
/*
* make sure no PI-waiters arrive (or leave) while we are
* changing the priority of the task:
- */
- raw_spin_lock_irqsave(&p->pi_lock, flags);
- /*
+ *
* To be able to change p->policy safely, the grunqueue lock must be
* held.
*/
- rq = __task_grq_lock(p);
+ rq = task_grq_lock(p, &flags);
/*
* Changing the policy of the stop threads its a very bad idea
*/
if (p == rq->stop) {
- __task_grq_unlock();
- raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+ task_grq_unlock(p, &flags);
return -EINVAL;
}
@@ -4165,31 +4111,21 @@ recheck:
if (unlikely(policy == p->policy && (!is_rt_policy(policy) ||
param->sched_priority == p->rt_priority))) {
- __task_grq_unlock();
- raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+ task_grq_unlock(p, &flags);
return 0;
}
/* recheck policy now with rq lock held */
if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
policy = oldpolicy = -1;
- __task_grq_unlock();
- raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+ task_grq_unlock(p, &flags);
goto recheck;
}
update_clocks(rq);
p->sched_reset_on_fork = reset_on_fork;
- queued = task_queued(p);
- if (queued)
- dequeue_task(p);
__setscheduler(p, rq, policy, param->sched_priority, pi);
- if (queued) {
- enqueue_task(p, rq);
- try_preempt(p, rq);
- }
- __task_grq_unlock();
- raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+ task_grq_unlock(p, &flags);
if (pi)
rt_mutex_adjust_pi(p);
@@ -4706,7 +4642,8 @@ out_unlock:
* @len: length in bytes of the bitmask pointed to by user_mask_ptr
* @user_mask_ptr: user-space pointer to hold the current cpu mask
*
- * Return: 0 on success. An error code otherwise.
+ * Return: size of CPU mask copied to user_mask_ptr on success. An
+ * error code otherwise.
*/
SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
unsigned long __user *, user_mask_ptr)
@@ -5113,6 +5050,8 @@ void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_ma
void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
{
cpumask_copy(tsk_cpus_allowed(p), new_mask);
+ if (needs_other_cpu(p, task_cpu(p)))
+ set_task_cpu(p, cpumask_any(tsk_cpus_allowed(p)));
}
#endif
@@ -5376,6 +5315,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
{
const struct cpumask *cpu_valid_mask = cpu_active_mask;
bool running_wrong = false;
+ struct cpumask old_mask;
bool queued = false;
unsigned long flags;
struct rq *rq;
@@ -5399,7 +5339,8 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
goto out;
}
- if (cpumask_equal(tsk_cpus_allowed(p), new_mask))
+ cpumask_copy(&old_mask, tsk_cpus_allowed(p));
+ if (cpumask_equal(&old_mask, new_mask))
goto out;
if (!cpumask_intersects(new_mask, cpu_valid_mask)) {
@@ -5436,12 +5377,16 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
set_task_cpu(p, cpumask_any_and(cpu_valid_mask, new_mask));
out:
- if (queued)
+ if (queued && !cpumask_subset(new_mask, &old_mask))
try_preempt(p, rq);
- task_grq_unlock(&flags);
-
if (running_wrong)
- preempt_schedule_common();
+ preempt_disable();
+ task_grq_unlock(p, &flags);
+
+ if (running_wrong) {
+ __schedule(true);
+ preempt_enable();
+ }
return ret;
}
@@ -5471,6 +5416,11 @@ static void bind_zero(int src_cpu)
cpumask_set_cpu(0, tsk_cpus_allowed(p));
p->zerobound = true;
bound++;
+ if (task_cpu(p) == src_cpu) {
+ set_task_cpu(p, 0);
+ if (task_running(p))
+ resched_task(p);
+ }
}
} while_each_thread(t, p);
@@ -7008,6 +6958,7 @@ void __init sched_init_smp(void)
#ifdef CONFIG_SCHED_SMT
bool smt_threads = false;
#endif
+ struct rq *rq;
cpumask_var_t non_isolated_cpus;
@@ -7045,7 +6996,7 @@ void __init sched_init_smp(void)
* nodes) are treated as very distant.
*/
for_each_online_cpu(cpu) {
- struct rq *rq = cpu_rq(cpu);
+ rq = cpu_rq(cpu);
/* First check if this cpu is in the same node */
for_each_domain(cpu, sd) {
@@ -7084,6 +7035,17 @@ void __init sched_init_smp(void)
}
#endif
}
+ for_each_possible_cpu(cpu) {
+ int total_cpus = 0, locality;
+
+ rq = cpu_rq(cpu);
+ for (locality = 0; locality <= 4; locality++) {
+ for_each_possible_cpu(other_cpu) {
+ if (rq->cpu_locality[other_cpu] == locality)
+ rq->rq_order[total_cpus++] = cpu_rq(other_cpu);
+ }
+ }
+ }
#ifdef CONFIG_SMT_NICE
if (smt_threads) {
check_siblings = &check_smt_siblings;
@@ -7095,7 +7057,8 @@ void __init sched_init_smp(void)
mutex_unlock(&sched_domains_mutex);
for_each_online_cpu(cpu) {
- struct rq *rq = cpu_rq(cpu);
+ rq = cpu_rq(cpu);
+
for_each_online_cpu(other_cpu) {
if (other_cpu <= cpu)
continue;
@@ -7220,6 +7183,10 @@ void __init sched_init(void)
else
rq->cpu_locality[j] = 4;
}
+ rq->rq_order = kmalloc(cpu_ids * sizeof(struct rq *), GFP_ATOMIC);
+ rq->rq_order[0] = rq;
+ for (j = 1; j < cpu_ids; j++)
+ rq->rq_order[j] = cpu_rq(j);
}
#endif
@@ -7323,7 +7290,6 @@ static inline void normalise_rt_tasks(void)
struct task_struct *g, *p;
unsigned long flags;
struct rq *rq;
- int queued;
read_lock(&tasklist_lock);
for_each_process_thread(g, p) {
@@ -7337,16 +7303,8 @@ static inline void normalise_rt_tasks(void)
continue;
rq = task_grq_lock(p, &flags);
- queued = task_queued(p);
- if (queued)
- dequeue_task(p);
__setscheduler(p, rq, SCHED_NORMAL, 0, false);
- if (queued) {
- enqueue_task(p, rq);
- try_preempt(p, rq);
- }
-
- task_grq_unlock(&flags);
+ task_grq_unlock(p, &flags);
}
read_unlock(&tasklist_lock);
}