From d0b2f91bede3bd5e3d24dd6803e56eee959c1797 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Fabian=20Silva=20Delgado?= Date: Thu, 20 Oct 2016 00:10:27 -0300 Subject: Linux-libre 4.8.2-gnu --- kernel/sched/bfs.c | 500 ++++++++++++++++++++++++----------------------------- 1 file changed, 229 insertions(+), 271 deletions(-) (limited to 'kernel/sched/bfs.c') diff --git a/kernel/sched/bfs.c b/kernel/sched/bfs.c index 67f93e752..bb5bac4b2 100644 --- a/kernel/sched/bfs.c +++ b/kernel/sched/bfs.c @@ -24,7 +24,7 @@ * 2007-07-01 Group scheduling enhancements by Srivatsa Vaddagiri * 2007-11-29 RT balancing improvements by Steven Rostedt, Gregory Haskins, * Thomas Gleixner, Mike Kravetz - * now Brainfuck deadline scheduling policy by Con Kolivas deletes + * 2009-08-13 Brainfuck deadline scheduling policy by Con Kolivas deletes * a whole lot of those previous things. */ @@ -137,7 +137,7 @@ void print_scheduler_version(void) { - printk(KERN_INFO "BFS CPU scheduler v0.502 by Con Kolivas.\n"); + printk(KERN_INFO "BFS CPU scheduler v0.512 by Con Kolivas.\n"); } /* @@ -403,7 +403,6 @@ static inline void grq_lock_irq(void) } static inline void time_lock_grq(struct rq *rq) - __acquires(grq.lock) { grq_lock(); update_clocks(rq); @@ -429,86 +428,35 @@ static inline void grq_unlock_irqrestore(unsigned long *flags) static inline struct rq *task_grq_lock(struct task_struct *p, unsigned long *flags) - __acquires(grq.lock) + __acquires(p->pi_lock) { - grq_lock_irqsave(flags); + raw_spin_lock_irqsave(&p->pi_lock, *flags); + grq_lock(); return task_rq(p); } static inline struct rq *time_task_grq_lock(struct task_struct *p, unsigned long *flags) - __acquires(grq.lock) { struct rq *rq = task_grq_lock(p, flags); - update_clocks(rq); - return rq; -} -static inline struct rq *task_grq_lock_irq(struct task_struct *p) - __acquires(grq.lock) -{ - grq_lock_irq(); - return task_rq(p); -} - -static inline void time_task_grq_lock_irq(struct task_struct *p) - __acquires(grq.lock) -{ - struct rq *rq = task_grq_lock_irq(p); update_clocks(rq); + return rq; } -static inline void task_grq_unlock_irq(void) - __releases(grq.lock) -{ - grq_unlock_irq(); -} - -static inline void task_grq_unlock(unsigned long *flags) - __releases(grq.lock) -{ - grq_unlock_irqrestore(flags); -} - -/** - * grunqueue_is_locked - * - * Returns true if the global runqueue is locked. - * This interface allows printk to be called with the runqueue lock - * held and know whether or not it is OK to wake up the klogd. - */ -bool grunqueue_is_locked(void) -{ - return raw_spin_is_locked(&grq.lock); -} - -void grq_unlock_wait(void) - __releases(grq.lock) +static inline void task_grq_unlock(struct task_struct *p, unsigned long *flags) + __releases(p->pi_lock) { - smp_mb(); /* spin-unlock-wait is not a full memory barrier */ - raw_spin_unlock_wait(&grq.lock); + grq_unlock(); + raw_spin_unlock_irqrestore(&p->pi_lock, *flags); } static inline void time_grq_lock(struct rq *rq, unsigned long *flags) - __acquires(grq.lock) { local_irq_save(*flags); time_lock_grq(rq); } -static inline struct rq *__task_grq_lock(struct task_struct *p) - __acquires(grq.lock) -{ - grq_lock(); - return task_rq(p); -} - -static inline void __task_grq_unlock(void) - __releases(grq.lock) -{ - grq_unlock(); -} - static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next) { } @@ -539,6 +487,40 @@ static inline bool deadline_after(u64 deadline, u64 time) return (deadline > time); } +/* + * Deadline is "now" in niffies + (offset by priority). Setting the deadline + * is the key to everything. It distributes cpu fairly amongst tasks of the + * same nice value, it proportions cpu according to nice level, it means the + * task that last woke up the longest ago has the earliest deadline, thus + * ensuring that interactive tasks get low latency on wake up. The CPU + * proportion works out to the square of the virtual deadline difference, so + * this equation will give nice 19 3% CPU compared to nice 0. + */ +static inline u64 prio_deadline_diff(int user_prio) +{ + return (prio_ratios[user_prio] * rr_interval * (MS_TO_NS(1) / 128)); +} + +static inline u64 task_deadline_diff(struct task_struct *p) +{ + return prio_deadline_diff(TASK_USER_PRIO(p)); +} + +static inline u64 static_deadline_diff(int static_prio) +{ + return prio_deadline_diff(USER_PRIO(static_prio)); +} + +static inline int longest_deadline_diff(void) +{ + return prio_deadline_diff(39); +} + +static inline int ms_longest_deadline_diff(void) +{ + return NS_TO_MS(longest_deadline_diff()); +} + /* * A task that is not running or queued will not have a node set. * A task that is queued but not running will have a node set. @@ -561,14 +543,23 @@ static void dequeue_task(struct task_struct *p) sched_info_dequeued(task_rq(p), p); } +#ifdef CONFIG_PREEMPT_RCU +static bool rcu_read_critical(struct task_struct *p) +{ + return p->rcu_read_unlock_special.b.blocked; +} +#else /* CONFIG_PREEMPT_RCU */ +#define rcu_read_critical(p) (false) +#endif /* CONFIG_PREEMPT_RCU */ + /* * To determine if it's safe for a task of SCHED_IDLEPRIO to actually run as * an idle task, we ensure none of the following conditions are met. */ static bool idleprio_suitable(struct task_struct *p) { - return (!freezing(p) && !signal_pending(p) && - !(task_contributes_to_load(p)) && !(p->flags & (PF_EXITING))); + return (!(task_contributes_to_load(p)) && !(p->flags & (PF_EXITING)) && + !signal_pending(p) && !rcu_read_critical(p) && !freezing(p)); } /* @@ -612,9 +603,13 @@ static void enqueue_task(struct task_struct *p, struct rq *rq) sl_id = p->prio; else { sl_id = p->deadline; - /* Set it to cope with 4 left shifts with locality_diff */ - if (p->prio == IDLE_PRIO) - sl_id |= 0x0F00000000000000; + if (idleprio_task(p)) { + /* Set it to cope with 4 left shifts with locality_diff */ + if (p->prio == IDLE_PRIO) + sl_id |= 0x00FF000000000000; + else + sl_id += longest_deadline_diff(); + } } /* * Some architectures don't have better than microsecond resolution @@ -1008,15 +1003,18 @@ static inline void deactivate_task(struct task_struct *p, struct rq *rq) #ifdef CONFIG_SMP void set_task_cpu(struct task_struct *p, unsigned int cpu) { - unsigned int tcpu; - #ifdef CONFIG_LOCKDEP /* - * The caller should hold grq lock. + * The caller should hold either p->pi_lock or grq lock, when changing + * a task's CPU. ->pi_lock for waking tasks, grq lock for runnable tasks. + * + * Furthermore, all task_rq users should acquire both locks, see + * task_grq_lock(). */ - WARN_ON_ONCE(debug_locks && !lockdep_is_held(&grq.lock)); + WARN_ON_ONCE(debug_locks && !(lockdep_is_held(&p->pi_lock) || + lockdep_is_held(&grq.lock))); #endif - if ((tcpu = task_cpu(p)) == cpu) + if (task_cpu(p) == cpu) return; trace_sched_migrate_task(p, cpu); perf_event_task_migrate(p); @@ -1027,6 +1025,7 @@ void set_task_cpu(struct task_struct *p, unsigned int cpu) * per-task data have been completed by this moment. */ smp_wmb(); + if (p->on_rq) { struct rq *rq = task_rq(p); @@ -1166,7 +1165,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state) ncsw = 0; if (!match_state || p->state == match_state) ncsw = p->nvcsw | LONG_MIN; /* sets MSB */ - task_grq_unlock(&flags); + task_grq_unlock(p, &flags); /* * If it changed from the expected state, bail out now. @@ -1292,9 +1291,7 @@ static inline bool needs_other_cpu(struct task_struct *p, int cpu) static void try_preempt(struct task_struct *p, struct rq *this_rq) { - int cpu, pcpu, highest_prio, highest_cpu; - struct rq *highest_prio_rq; - u64 latest_deadline; + int i, this_entries = this_rq->soft_affined; cpumask_t tmp; if (suitable_idle_cpus(p) && resched_best_idle(p)) @@ -1306,56 +1303,32 @@ static void try_preempt(struct task_struct *p, struct rq *this_rq) cpumask_and(&tmp, &cpu_online_map, &p->cpus_allowed); - /* See if this task can preempt the task on the current CPU first. */ - pcpu = cpu_of(this_rq); - if (likely(cpumask_test_cpu(pcpu, &tmp))) { - if (smt_schedule(p, this_rq) && can_preempt(p, this_rq->rq_prio, this_rq->rq_deadline)) { - resched_curr(this_rq); - return; - } - cpumask_clear_cpu(pcpu, &tmp); - } - - highest_prio = latest_deadline = 0; - highest_prio_rq = NULL; - - /* Now look for the CPU with the latest deadline */ - for_each_cpu(cpu, &tmp) { - struct rq *rq; - int rq_prio; - u64 dl; + /* + * We iterate over CPUs in locality order using rq_order, finding the + * first one we can preempt if possible, thus staying closest in + * locality. + */ + for (i = 0; i < num_possible_cpus(); i++) { + struct rq *rq = this_rq->rq_order[i]; - rq = cpu_rq(cpu); - rq_prio = rq->rq_prio; - if (rq_prio < highest_prio) + if (!cpumask_test_cpu(rq->cpu, &tmp)) continue; - dl = rq->rq_deadline; - if (!sched_interactive && pcpu != cpu) - dl <<= locality_diff(pcpu, rq); - if (rq_prio > highest_prio || - deadline_after(dl, latest_deadline)) { - latest_deadline = dl; - highest_prio = rq_prio; - highest_cpu = cpu; - highest_prio_rq = rq; + if (!sched_interactive && rq != this_rq && rq->soft_affined <= this_entries) + continue; + if (smt_schedule(p, rq) && can_preempt(p, rq->rq_prio, rq->rq_deadline)) { + /* + * If we have decided this task should preempt this CPU, + * set the task's CPU to match thereby speeding up matching + * this task in earliest_deadline_task. + */ + set_task_cpu(p, rq->cpu); + resched_curr(rq); + return; } } - - if (unlikely(!highest_prio_rq)) - return; - if (!smt_schedule(p, highest_prio_rq)) - return; - if (can_preempt(p, highest_prio, latest_deadline)) { - /* - * If we have decided this task should preempt this CPU, - * set the task's CPU to match thereby speeding up matching - * this task in earliest_deadline_task. - */ - set_task_cpu(p, highest_cpu); - resched_curr(highest_prio_rq); - } } + static int __set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask, bool check); #else /* CONFIG_SMP */ @@ -1501,8 +1474,6 @@ static bool try_to_wake_up(struct task_struct *p, unsigned int state, struct rq *rq; int cpu; - get_cpu(); - /* * If we are going to wake up a thread waiting for CONDITION we * need to ensure that CONDITION=1 done by the caller can not be @@ -1533,13 +1504,11 @@ static bool try_to_wake_up(struct task_struct *p, unsigned int state, out_running: ttwu_post_activation(p, rq, success); out_unlock: - task_grq_unlock(&flags); + task_grq_unlock(p, &flags); if (schedstat_enabled()) ttwu_stat(p, cpu, wake_flags); - put_cpu(); - return success; } @@ -1628,6 +1597,13 @@ int sched_fork(unsigned long __maybe_unused clone_flags, struct task_struct *p) p->utime_pc = 0; skiplist_node_init(&p->node); + /* + * We mark the process as NEW here. This guarantees that + * nobody will actually run it, and a signal or other external + * event cannot wake it up and insert it on the runqueue either. + */ + p->state = TASK_NEW; + /* * Revert to default priority/policy on fork if requested. */ @@ -1744,12 +1720,16 @@ static inline void init_schedstats(void) {} */ void wake_up_new_task(struct task_struct *p) { - struct task_struct *parent; + struct task_struct *parent, *rq_curr; + struct rq *rq, *new_rq; unsigned long flags; - struct rq *rq; parent = p->parent; rq = task_grq_lock(p, &flags); + if (unlikely(needs_other_cpu(p, task_cpu(p)))) + set_task_cpu(p, cpumask_any(tsk_cpus_allowed(p))); + rq_curr = rq->curr; + p->state = TASK_RUNNING; /* * Reinit new task deadline as its creator deadline could have changed @@ -1757,22 +1737,20 @@ void wake_up_new_task(struct task_struct *p) */ p->deadline = rq->rq_deadline; - /* - * If the task is a new process, current and parent are the same. If - * the task is a new thread in the thread group, it will have much more - * in common with current than with the parent. - */ - set_task_cpu(p, task_cpu(rq->curr)); + /* The new task might not be able to run on the same CPU as rq->curr */ + if (unlikely(needs_other_cpu(p, task_cpu(p)))) { + set_task_cpu(p, cpumask_any(tsk_cpus_allowed(p))); + new_rq = task_rq(p); + } else + new_rq = rq; /* * Make sure we do not leak PI boosting priority to the child. */ - p->prio = rq->curr->normal_prio; + p->prio = rq_curr->normal_prio; activate_task(p, rq); trace_sched_wakeup_new(p); - if (unlikely(p->policy == SCHED_FIFO)) - goto after_ts_init; /* * Share the timeslice between parent and child, thus the @@ -1784,33 +1762,39 @@ void wake_up_new_task(struct task_struct *p) * is always equal to current->deadline. */ p->last_ran = rq->rq_last_ran; - if (likely(rq->rq_time_slice >= RESCHED_US * 2)) { + if (likely(rq_curr->policy != SCHED_FIFO)) { rq->rq_time_slice /= 2; - p->time_slice = rq->rq_time_slice; -after_ts_init: - if (rq->curr == parent && !suitable_idle_cpus(p)) { + if (unlikely(rq->rq_time_slice < RESCHED_US)) { /* - * The VM isn't cloned, so we're in a good position to - * do child-runs-first in anticipation of an exec. This - * usually avoids a lot of COW overhead. + * Forking task has run out of timeslice. Reschedule it and + * start its child with a new time slice and deadline. The + * child will end up running first because its deadline will + * be slightly earlier. */ - __set_tsk_resched(parent); - } else - try_preempt(p, rq); - } else { - if (rq->curr == parent) { - /* - * Forking task has run out of timeslice. Reschedule it and - * start its child with a new time slice and deadline. The - * child will end up running first because its deadline will - * be slightly earlier. - */ rq->rq_time_slice = 0; - __set_tsk_resched(parent); + __set_tsk_resched(rq_curr); + time_slice_expired(p); + if (suitable_idle_cpus(p)) + resched_best_idle(p); + else if (unlikely(rq != new_rq)) + try_preempt(p, new_rq); + } else { + p->time_slice = rq->rq_time_slice; + if (rq_curr == parent && rq == new_rq && !suitable_idle_cpus(p)) { + /* + * The VM isn't cloned, so we're in a good position to + * do child-runs-first in anticipation of an exec. This + * usually avoids a lot of COW overhead. + */ + __set_tsk_resched(rq_curr); + } else + try_preempt(p, new_rq); } + } else { time_slice_expired(p); + try_preempt(p, new_rq); } - task_grq_unlock(&flags); + task_grq_unlock(p, &flags); } #ifdef CONFIG_PREEMPT_NOTIFIERS @@ -2724,7 +2708,7 @@ unsigned long long task_sched_runtime(struct task_struct *p) rq = task_grq_lock(p, &flags); ns = p->sched_time + do_task_delta_exec(p, rq); - task_grq_unlock(&flags); + task_grq_unlock(p, &flags); return ns; } @@ -2978,7 +2962,7 @@ static void task_running_tick(struct rq *rq) grq_lock(); requeue_task(p); - __set_tsk_resched(p); + resched_task(p); grq_unlock(); } @@ -3082,40 +3066,6 @@ static inline void preempt_latency_start(int val) { } static inline void preempt_latency_stop(int val) { } #endif -/* - * Deadline is "now" in niffies + (offset by priority). Setting the deadline - * is the key to everything. It distributes cpu fairly amongst tasks of the - * same nice value, it proportions cpu according to nice level, it means the - * task that last woke up the longest ago has the earliest deadline, thus - * ensuring that interactive tasks get low latency on wake up. The CPU - * proportion works out to the square of the virtual deadline difference, so - * this equation will give nice 19 3% CPU compared to nice 0. - */ -static inline u64 prio_deadline_diff(int user_prio) -{ - return (prio_ratios[user_prio] * rr_interval * (MS_TO_NS(1) / 128)); -} - -static inline u64 task_deadline_diff(struct task_struct *p) -{ - return prio_deadline_diff(TASK_USER_PRIO(p)); -} - -static inline u64 static_deadline_diff(int static_prio) -{ - return prio_deadline_diff(USER_PRIO(static_prio)); -} - -static inline int longest_deadline_diff(void) -{ - return prio_deadline_diff(39); -} - -static inline int ms_longest_deadline_diff(void) -{ - return NS_TO_MS(longest_deadline_diff()); -} - /* * The time_slice is only refilled when it is empty and that is when we set a * new deadline. @@ -3215,13 +3165,12 @@ found_middle: static inline struct task_struct *earliest_deadline_task(struct rq *rq, int cpu, struct task_struct *idle) { - struct task_struct *edt = idle; skiplist_node *node = &grq.node; + struct task_struct *edt = idle; u64 earliest_deadline = ~0ULL; while ((node = node->next[0]) != &grq.node) { struct task_struct *p = node->value; - int tcpu; /* Make sure affinity is ok */ if (needs_other_cpu(p, cpu)) @@ -3230,22 +3179,24 @@ task_struct *earliest_deadline_task(struct rq *rq, int cpu, struct task_struct * if (!smt_schedule(p, rq)) continue; - if (!sched_interactive && (tcpu = task_cpu(p)) != cpu) { - u64 dl = p->deadline << locality_diff(tcpu, rq); + if (!sched_interactive) { + int tcpu; + + if ((tcpu = task_cpu(p)) != cpu) { + u64 dl = p->deadline << locality_diff(tcpu, rq); - if (unlikely(!deadline_before(dl, earliest_deadline))) + if (!deadline_before(dl, earliest_deadline)) + continue; + earliest_deadline = dl; + edt = p; + /* We continue even though we've found the earliest + * deadline task as the locality offset means there + * may be a better candidate after it. */ continue; - earliest_deadline = dl; - edt = p; - /* We continue even though we've found the earliest - * deadline task as the locality offset means there - * may be a better candidate after it. */ - continue; + } } - /* This wouldn't happen if we encountered a better deadline from - * another CPU and have already set edt. */ - if (likely(p->deadline < earliest_deadline)) - edt = p; + /* We've encountered the best deadline local task */ + edt = p; break; } if (likely(edt != idle)) @@ -3275,6 +3226,9 @@ static noinline void __schedule_bug(struct task_struct *prev) pr_cont("\n"); } #endif + if (panic_on_warn) + panic("scheduling while atomic\n"); + dump_stack(); add_taint(TAINT_WARN, LOCKDEP_STILL_OK); } @@ -3316,10 +3270,6 @@ static inline void set_rq_task(struct rq *rq, struct task_struct *p) rq->rq_mm = p->mm; rq->rq_smt_bias = p->smt_bias; #endif - if (p != rq->idle) - rq->rq_running = true; - else - rq->rq_running = false; } static void reset_rq_task(struct rq *rq, struct task_struct *p) @@ -3353,7 +3303,7 @@ static void check_smt_siblings(struct rq *this_rq) if (unlikely(!rq->online)) continue; p = rq->curr; - if (!smt_should_schedule(p, this_rq)) { + if (!smt_schedule(p, this_rq)) { set_tsk_need_resched(p); smp_send_reschedule(other_cpu); } @@ -3546,8 +3496,6 @@ static void __sched notrace __schedule(bool preempt) trace_sched_switch(preempt, prev, next); rq = context_switch(rq, prev, next); /* unlocks the grq */ - cpu = cpu_of(rq); - idle = rq->idle; } else { check_siblings(rq); grq_unlock_irq(); @@ -3766,8 +3714,8 @@ EXPORT_SYMBOL(default_wake_function); void rt_mutex_setprio(struct task_struct *p, int prio) { unsigned long flags; - int queued, oldprio; struct rq *rq; + int oldprio; BUG_ON(prio < 0 || prio > MAX_PRIO); @@ -3793,19 +3741,18 @@ void rt_mutex_setprio(struct task_struct *p, int prio) trace_sched_pi_setprio(p, prio); oldprio = p->prio; - queued = task_queued(p); - if (queued) - dequeue_task(p); p->prio = prio; - if (task_running(p) && prio > oldprio) - resched_task(p); - if (queued) { + if (task_running(p)){ + if (prio > oldprio) + resched_task(p); + } else if (task_queued(p)) { + dequeue_task(p); enqueue_task(p, rq); - try_preempt(p, rq); + if (prio < oldprio) + try_preempt(p, rq); } - out_unlock: - task_grq_unlock(&flags); + task_grq_unlock(p, &flags); } #endif @@ -3821,7 +3768,7 @@ static inline void adjust_deadline(struct task_struct *p, int new_prio) void set_user_nice(struct task_struct *p, long nice) { - int queued, new_static, old_static; + int new_static, old_static; unsigned long flags; struct rq *rq; @@ -3843,16 +3790,14 @@ void set_user_nice(struct task_struct *p, long nice) p->static_prio = new_static; goto out_unlock; } - queued = task_queued(p); - if (queued) - dequeue_task(p); adjust_deadline(p, new_static); old_static = p->static_prio; p->static_prio = new_static; p->prio = effective_prio(p); - if (queued) { + if (task_queued(p)) { + dequeue_task(p); enqueue_task(p, rq); if (new_static < old_static) try_preempt(p, rq); @@ -3862,7 +3807,7 @@ void set_user_nice(struct task_struct *p, long nice) resched_task(p); } out_unlock: - task_grq_unlock(&flags); + task_grq_unlock(p, &flags); } EXPORT_SYMBOL(set_user_nice); @@ -4002,11 +3947,15 @@ static void __setscheduler(struct task_struct *p, struct rq *rq, int policy, p->prio = rt_mutex_get_effective_prio(p, p->normal_prio); } else p->prio = p->normal_prio; + if (task_running(p)) { reset_rq_task(rq, p); - /* Resched only if we might now be preempted */ - if (p->prio > oldprio || p->rt_priority > oldrtprio) - resched_task(p); + resched_task(p); + } else if (task_queued(p)) { + dequeue_task(p); + enqueue_task(p, rq); + if (p->prio < oldprio || p->rt_priority > oldrtprio) + try_preempt(p, rq); } } @@ -4031,8 +3980,8 @@ __sched_setscheduler(struct task_struct *p, int policy, const struct sched_param *param, bool user, bool pi) { struct sched_param zero_param = { .sched_priority = 0 }; - int queued, retval, oldpolicy = -1; unsigned long flags, rlim_rtprio = 0; + int retval, oldpolicy = -1; int reset_on_fork; struct rq *rq; @@ -4142,20 +4091,17 @@ recheck: /* * make sure no PI-waiters arrive (or leave) while we are * changing the priority of the task: - */ - raw_spin_lock_irqsave(&p->pi_lock, flags); - /* + * * To be able to change p->policy safely, the grunqueue lock must be * held. */ - rq = __task_grq_lock(p); + rq = task_grq_lock(p, &flags); /* * Changing the policy of the stop threads its a very bad idea */ if (p == rq->stop) { - __task_grq_unlock(); - raw_spin_unlock_irqrestore(&p->pi_lock, flags); + task_grq_unlock(p, &flags); return -EINVAL; } @@ -4165,31 +4111,21 @@ recheck: if (unlikely(policy == p->policy && (!is_rt_policy(policy) || param->sched_priority == p->rt_priority))) { - __task_grq_unlock(); - raw_spin_unlock_irqrestore(&p->pi_lock, flags); + task_grq_unlock(p, &flags); return 0; } /* recheck policy now with rq lock held */ if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { policy = oldpolicy = -1; - __task_grq_unlock(); - raw_spin_unlock_irqrestore(&p->pi_lock, flags); + task_grq_unlock(p, &flags); goto recheck; } update_clocks(rq); p->sched_reset_on_fork = reset_on_fork; - queued = task_queued(p); - if (queued) - dequeue_task(p); __setscheduler(p, rq, policy, param->sched_priority, pi); - if (queued) { - enqueue_task(p, rq); - try_preempt(p, rq); - } - __task_grq_unlock(); - raw_spin_unlock_irqrestore(&p->pi_lock, flags); + task_grq_unlock(p, &flags); if (pi) rt_mutex_adjust_pi(p); @@ -4706,7 +4642,8 @@ out_unlock: * @len: length in bytes of the bitmask pointed to by user_mask_ptr * @user_mask_ptr: user-space pointer to hold the current cpu mask * - * Return: 0 on success. An error code otherwise. + * Return: size of CPU mask copied to user_mask_ptr on success. An + * error code otherwise. */ SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len, unsigned long __user *, user_mask_ptr) @@ -5113,6 +5050,8 @@ void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_ma void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) { cpumask_copy(tsk_cpus_allowed(p), new_mask); + if (needs_other_cpu(p, task_cpu(p))) + set_task_cpu(p, cpumask_any(tsk_cpus_allowed(p))); } #endif @@ -5376,6 +5315,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, { const struct cpumask *cpu_valid_mask = cpu_active_mask; bool running_wrong = false; + struct cpumask old_mask; bool queued = false; unsigned long flags; struct rq *rq; @@ -5399,7 +5339,8 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, goto out; } - if (cpumask_equal(tsk_cpus_allowed(p), new_mask)) + cpumask_copy(&old_mask, tsk_cpus_allowed(p)); + if (cpumask_equal(&old_mask, new_mask)) goto out; if (!cpumask_intersects(new_mask, cpu_valid_mask)) { @@ -5436,12 +5377,16 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, set_task_cpu(p, cpumask_any_and(cpu_valid_mask, new_mask)); out: - if (queued) + if (queued && !cpumask_subset(new_mask, &old_mask)) try_preempt(p, rq); - task_grq_unlock(&flags); - if (running_wrong) - preempt_schedule_common(); + preempt_disable(); + task_grq_unlock(p, &flags); + + if (running_wrong) { + __schedule(true); + preempt_enable(); + } return ret; } @@ -5471,6 +5416,11 @@ static void bind_zero(int src_cpu) cpumask_set_cpu(0, tsk_cpus_allowed(p)); p->zerobound = true; bound++; + if (task_cpu(p) == src_cpu) { + set_task_cpu(p, 0); + if (task_running(p)) + resched_task(p); + } } } while_each_thread(t, p); @@ -7008,6 +6958,7 @@ void __init sched_init_smp(void) #ifdef CONFIG_SCHED_SMT bool smt_threads = false; #endif + struct rq *rq; cpumask_var_t non_isolated_cpus; @@ -7045,7 +6996,7 @@ void __init sched_init_smp(void) * nodes) are treated as very distant. */ for_each_online_cpu(cpu) { - struct rq *rq = cpu_rq(cpu); + rq = cpu_rq(cpu); /* First check if this cpu is in the same node */ for_each_domain(cpu, sd) { @@ -7084,6 +7035,17 @@ void __init sched_init_smp(void) } #endif } + for_each_possible_cpu(cpu) { + int total_cpus = 0, locality; + + rq = cpu_rq(cpu); + for (locality = 0; locality <= 4; locality++) { + for_each_possible_cpu(other_cpu) { + if (rq->cpu_locality[other_cpu] == locality) + rq->rq_order[total_cpus++] = cpu_rq(other_cpu); + } + } + } #ifdef CONFIG_SMT_NICE if (smt_threads) { check_siblings = &check_smt_siblings; @@ -7095,7 +7057,8 @@ void __init sched_init_smp(void) mutex_unlock(&sched_domains_mutex); for_each_online_cpu(cpu) { - struct rq *rq = cpu_rq(cpu); + rq = cpu_rq(cpu); + for_each_online_cpu(other_cpu) { if (other_cpu <= cpu) continue; @@ -7220,6 +7183,10 @@ void __init sched_init(void) else rq->cpu_locality[j] = 4; } + rq->rq_order = kmalloc(cpu_ids * sizeof(struct rq *), GFP_ATOMIC); + rq->rq_order[0] = rq; + for (j = 1; j < cpu_ids; j++) + rq->rq_order[j] = cpu_rq(j); } #endif @@ -7323,7 +7290,6 @@ static inline void normalise_rt_tasks(void) struct task_struct *g, *p; unsigned long flags; struct rq *rq; - int queued; read_lock(&tasklist_lock); for_each_process_thread(g, p) { @@ -7337,16 +7303,8 @@ static inline void normalise_rt_tasks(void) continue; rq = task_grq_lock(p, &flags); - queued = task_queued(p); - if (queued) - dequeue_task(p); __setscheduler(p, rq, SCHED_NORMAL, 0, false); - if (queued) { - enqueue_task(p, rq); - try_preempt(p, rq); - } - - task_grq_unlock(&flags); + task_grq_unlock(p, &flags); } read_unlock(&tasklist_lock); } -- cgit v1.2.3