summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sched/MuQSS.c250
-rw-r--r--kernel/sched/MuQSS.h3
-rw-r--r--kernel/skip_list.c34
3 files changed, 124 insertions, 163 deletions
diff --git a/kernel/sched/MuQSS.c b/kernel/sched/MuQSS.c
index 6a656ad4b..bd9b1f13f 100644
--- a/kernel/sched/MuQSS.c
+++ b/kernel/sched/MuQSS.c
@@ -135,7 +135,7 @@
void print_scheduler_version(void)
{
- printk(KERN_INFO "MuQSS CPU scheduler v0.114 by Con Kolivas.\n");
+ printk(KERN_INFO "MuQSS CPU scheduler v0.115 by Con Kolivas.\n");
}
/*
@@ -179,28 +179,9 @@ static inline int timeslice(void)
return MS_TO_US(rr_interval);
}
-/*
- * The global runqueue data that all CPUs work off. Contains either atomic
- * variables and a cpu bitmap set atomically.
- */
-struct global_rq {
#ifdef CONFIG_SMP
- atomic_t nr_running ____cacheline_aligned_in_smp;
- atomic_t nr_uninterruptible ____cacheline_aligned_in_smp;
- atomic64_t nr_switches ____cacheline_aligned_in_smp;
- atomic_t qnr ____cacheline_aligned_in_smp; /* queued not running */
-#else
- atomic_t nr_running ____cacheline_aligned;
- atomic_t nr_uninterruptible ____cacheline_aligned;
- atomic64_t nr_switches ____cacheline_aligned;
- atomic_t qnr ____cacheline_aligned; /* queued not running */
-#endif
-#ifdef CONFIG_SMP
- cpumask_t cpu_idle_map;
-#endif
-};
+static cpumask_t cpu_idle_map ____cacheline_aligned_in_smp;
-#ifdef CONFIG_SMP
/*
* We add the notion of a root-domain which will be used to define per-domain
* variables. Each exclusive cpuset essentially defines an island domain by
@@ -232,13 +213,6 @@ static struct root_domain def_root_domain;
#endif /* CONFIG_SMP */
-/* There can be only one */
-#ifdef CONFIG_SMP
-static struct global_rq grq ____cacheline_aligned_in_smp;
-#else
-static struct global_rq grq ____cacheline_aligned;
-#endif
-
static DEFINE_MUTEX(sched_hotcpu_mutex);
/* cpus with isolated domains */
@@ -710,6 +684,12 @@ static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
next->on_cpu = 1;
}
+static inline void smp_sched_reschedule(int cpu)
+{
+ if (likely(cpu_online(cpu)))
+ smp_send_reschedule(cpu);
+}
+
/*
* resched_task - mark a task 'to be rescheduled now'.
*
@@ -736,7 +716,7 @@ void resched_task(struct task_struct *p)
}
if (set_nr_and_not_polling(p))
- smp_send_reschedule(cpu);
+ smp_sched_reschedule(cpu);
else
trace_sched_wake_idle_without_ipi(cpu);
}
@@ -788,6 +768,7 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
*/
if (unlikely(task_on_rq_migrating(prev))) {
sched_info_dequeued(rq, prev);
+ rq->nr_running--;
/*
* We move the ownership of prev to the new cpu now. ttwu can't
* activate prev to the wrong cpu since it has to grab this
@@ -798,6 +779,7 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
raw_spin_lock(&prev->pi_lock);
rq = __task_rq_lock(prev);
+ rq->nr_running++;
/* Check that someone else hasn't already queued prev */
if (likely(!task_queued(prev))) {
enqueue_task(rq, prev, 0);
@@ -852,7 +834,7 @@ static inline int ms_longest_deadline_diff(void)
static inline int rq_load(struct rq *rq)
{
- return rq->sl->entries + !rq_idle(rq);
+ return rq->nr_running;
}
static inline bool rq_local(struct rq *rq);
@@ -999,26 +981,6 @@ static inline int task_timeslice(struct task_struct *p)
return (rr_interval * task_prio_ratio(p) / 128);
}
-/*
- * qnr is the "queued but not running" count which is the total number of
- * tasks on the global runqueue list waiting for cpu time but not actually
- * currently running on a cpu.
- */
-static inline void inc_qnr(void)
-{
- atomic_inc(&grq.qnr);
-}
-
-static inline void dec_qnr(void)
-{
- atomic_dec(&grq.qnr);
-}
-
-static inline int queued_notrunning(void)
-{
- return atomic_read(&grq.qnr);
-}
-
#ifdef CONFIG_SMP
/* Entered with rq locked */
static inline void resched_if_idle(struct rq *rq)
@@ -1123,7 +1085,7 @@ static inline void atomic_set_cpu(int cpu, cpumask_t *cpumask)
static inline void set_cpuidle_map(int cpu)
{
if (likely(cpu_online(cpu)))
- atomic_set_cpu(cpu, &grq.cpu_idle_map);
+ atomic_set_cpu(cpu, &cpu_idle_map);
}
static inline void atomic_clear_cpu(int cpu, cpumask_t *cpumask)
@@ -1133,12 +1095,12 @@ static inline void atomic_clear_cpu(int cpu, cpumask_t *cpumask)
static inline void clear_cpuidle_map(int cpu)
{
- atomic_clear_cpu(cpu, &grq.cpu_idle_map);
+ atomic_clear_cpu(cpu, &cpu_idle_map);
}
static bool suitable_idle_cpus(struct task_struct *p)
{
- return (cpumask_intersects(&p->cpus_allowed, &grq.cpu_idle_map));
+ return (cpumask_intersects(&p->cpus_allowed, &cpu_idle_map));
}
/*
@@ -1165,7 +1127,7 @@ static void resched_curr(struct rq *rq)
}
if (set_nr_and_not_polling(rq->curr))
- smp_send_reschedule(cpu);
+ smp_sched_reschedule(cpu);
else
trace_sched_wake_idle_without_ipi(cpu);
}
@@ -1260,7 +1222,7 @@ static inline void resched_idle(struct rq *rq)
return;
}
- smp_send_reschedule(rq->cpu);
+ smp_sched_reschedule(rq->cpu);
}
static struct rq *resched_best_idle(struct task_struct *p, int cpu)
@@ -1269,7 +1231,7 @@ static struct rq *resched_best_idle(struct task_struct *p, int cpu)
struct rq *rq;
int best_cpu;
- cpumask_and(&tmpmask, &p->cpus_allowed, &grq.cpu_idle_map);
+ cpumask_and(&tmpmask, &p->cpus_allowed, &cpu_idle_map);
best_cpu = best_mask_cpu(cpu, task_rq(p), &tmpmask);
rq = cpu_rq(best_cpu);
if (!smt_schedule(p, rq))
@@ -1381,12 +1343,11 @@ static void activate_task(struct task_struct *p, struct rq *rq)
p->prio = effective_prio(p);
if (task_contributes_to_load(p))
- atomic_dec(&grq.nr_uninterruptible);
+ rq->nr_uninterruptible--;
enqueue_task(rq, p, 0);
p->on_rq = TASK_ON_RQ_QUEUED;
- atomic_inc(&grq.nr_running);
- inc_qnr();
+ rq->nr_running++;
}
/*
@@ -1396,10 +1357,10 @@ static void activate_task(struct task_struct *p, struct rq *rq)
static inline void deactivate_task(struct task_struct *p, struct rq *rq)
{
if (task_contributes_to_load(p))
- atomic_inc(&grq.nr_uninterruptible);
+ rq->nr_uninterruptible++;
p->on_rq = 0;
- atomic_dec(&grq.nr_running);
+ rq->nr_running--;
sched_info_dequeued(rq, p);
}
@@ -1467,11 +1428,12 @@ static inline void take_task(struct rq *rq, int cpu, struct task_struct *p)
dequeue_task(p_rq, p, DEQUEUE_SAVE);
if (p_rq != rq) {
+ p_rq->nr_running--;
sched_info_dequeued(p_rq, p);
+ rq->nr_running++;
sched_info_queued(rq, p);
}
set_task_cpu(p, cpu);
- dec_qnr();
}
/*
@@ -1484,7 +1446,6 @@ static inline void return_task(struct task_struct *p, struct rq *rq,
if (deactivate)
deactivate_task(p, rq);
else {
- inc_qnr();
#ifdef CONFIG_SMP
/*
* set_task_cpu was called on the running task that doesn't
@@ -1641,7 +1602,7 @@ void kick_process(struct task_struct *p)
preempt_disable();
cpu = task_cpu(p);
if ((cpu != smp_processor_id()) && task_curr(p))
- smp_send_reschedule(cpu);
+ smp_sched_reschedule(cpu);
preempt_enable();
}
EXPORT_SYMBOL_GPL(kick_process);
@@ -1806,7 +1767,7 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags)
#ifdef CONFIG_SMP
if (p->sched_contributes_to_load)
- atomic_dec(&grq.nr_uninterruptible);
+ rq->nr_uninterruptible--;
#endif
ttwu_activate(rq, p);
@@ -1897,7 +1858,7 @@ static void ttwu_queue_remote(struct task_struct *p, int cpu, int wake_flags)
if (llist_add(&p->wake_entry, &cpu_rq(cpu)->wake_list)) {
if (!set_nr_if_polling(rq->idle))
- smp_send_reschedule(cpu);
+ smp_sched_reschedule(cpu);
else
trace_sched_wake_idle_without_ipi(cpu);
}
@@ -1918,7 +1879,7 @@ void wake_up_if_idle(int cpu)
} else {
rq_lock_irqsave(rq, &flags);
if (likely(is_idle_task(rq->curr)))
- smp_send_reschedule(cpu);
+ smp_sched_reschedule(cpu);
/* Else cpu is not in idle, do nothing here */
rq_unlock_irqrestore(rq, &flags);
}
@@ -1980,7 +1941,7 @@ static inline int select_best_cpu(struct task_struct *p)
rq = other_rq;
}
if (unlikely(!rq))
- return smp_processor_id();
+ return task_cpu(p);
return rq->cpu;
}
#else /* CONFIG_SMP */
@@ -2690,22 +2651,6 @@ context_switch(struct rq *rq, struct task_struct *prev,
}
/*
- * nr_running, nr_uninterruptible and nr_context_switches:
- *
- * externally visible scheduler statistics: current number of runnable
- * threads, total number of context switches performed since bootup.
- */
-unsigned long nr_running(void)
-{
- return atomic_read(&grq.nr_running);
-}
-
-static unsigned long nr_uninterruptible(void)
-{
- return atomic_read(&grq.nr_uninterruptible);
-}
-
-/*
* Check if only the current task is running on the cpu.
*
* Caution: this function does not check that the caller has disabled
@@ -2729,9 +2674,31 @@ bool single_task_running(void)
}
EXPORT_SYMBOL(single_task_running);
+/*
+ * nr_running, nr_uninterruptible and nr_context_switches:
+ *
+ * externally visible scheduler statistics: current number of runnable
+ * threads, total number of context switches performed since bootup.
+ */
unsigned long long nr_context_switches(void)
{
- return (unsigned long long)atomic64_read(&grq.nr_switches);
+ long long sum = 0;
+ int i;
+
+ for_each_possible_cpu(i)
+ sum += cpu_rq(i)->nr_switches;
+
+ return sum;
+}
+
+unsigned long nr_running(void)
+{
+ long i, sum = 0;
+
+ for_each_online_cpu(i)
+ sum += cpu_rq(i)->nr_running;
+
+ return sum;
}
unsigned long nr_iowait(void)
@@ -2752,7 +2719,14 @@ unsigned long nr_iowait_cpu(int cpu)
unsigned long nr_active(void)
{
- return nr_running() + nr_uninterruptible();
+ long i, sum = 0;
+
+ for_each_online_cpu(i) {
+ sum += cpu_rq(i)->nr_running;
+ sum += cpu_rq(i)->nr_uninterruptible;
+ }
+
+ return sum;
}
/*
@@ -3662,8 +3636,6 @@ static inline void check_deadline(struct task_struct *p, struct rq *rq)
time_slice_expired(p, rq);
}
-#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG)
-
/*
* Task selection with skiplists is a simple matter of picking off the first
* task in the sorted list, an O(1) operation. The lookup is amortised O(1)
@@ -3680,8 +3652,9 @@ static inline void check_deadline(struct task_struct *p, struct rq *rq)
* runqueue or a runqueue with more tasks than the current one with a better
* key/deadline.
*/
-static inline struct
-task_struct *earliest_deadline_task(struct rq *rq, int cpu, struct task_struct *idle)
+#ifdef CONFIG_SMP
+static inline struct task_struct
+*earliest_deadline_task(struct rq *rq, int cpu, struct task_struct *idle)
{
struct task_struct *edt = idle;
struct rq *locked = NULL;
@@ -3759,6 +3732,19 @@ task_struct *earliest_deadline_task(struct rq *rq, int cpu, struct task_struct *
return edt;
}
+#else /* CONFIG_SMP */
+static inline struct task_struct
+*earliest_deadline_task(struct rq *rq, int cpu, struct task_struct *idle)
+{
+ struct task_struct *edt;
+
+ if (unlikely(!rq->sl->entries))
+ return idle;
+ edt = rq->node.next[0]->value;
+ take_task(rq, cpu, edt);
+ return edt;
+}
+#endif /* CONFIG_SMP */
/*
* Print scheduling while atomic bug:
@@ -3840,13 +3826,9 @@ static void check_smt_siblings(struct rq *this_rq)
rq = cpu_rq(other_cpu);
if (rq_idle(rq))
continue;
- if (unlikely(!rq->online))
- continue;
p = rq->curr;
- if (!smt_schedule(p, this_rq)) {
- set_tsk_need_resched(p);
- smp_send_reschedule(other_cpu);
- }
+ if (!smt_schedule(p, this_rq))
+ resched_curr(rq);
}
}
@@ -3854,21 +3836,12 @@ static void wake_smt_siblings(struct rq *this_rq)
{
int other_cpu;
- if (!queued_notrunning())
- return;
-
for_each_cpu(other_cpu, &this_rq->thread_mask) {
struct rq *rq;
rq = cpu_rq(other_cpu);
- if (unlikely(!rq->online))
- continue;
- if (rq_idle(rq)) {
- struct task_struct *p = rq->curr;
-
- set_tsk_need_resched(p);
- smp_send_reschedule(other_cpu);
- }
+ if (rq_idle(rq))
+ resched_idle(rq);
}
}
#else
@@ -4020,23 +3993,16 @@ static void __sched notrace __schedule(bool preempt)
return_task(prev, rq, cpu, deactivate);
}
- if (unlikely(!queued_notrunning())) {
- next = idle;
- schedstat_inc(rq, sched_goidle);
+ next = earliest_deadline_task(rq, cpu, idle);
+ if (likely(next->prio != PRIO_LIMIT)) {
+ clear_cpuidle_map(cpu);
+ next->last_ran = niffies;
+ } else {
set_cpuidle_map(cpu);
update_load_avg(rq);
- } else {
- next = earliest_deadline_task(rq, cpu, idle);
- if (likely(next->prio != PRIO_LIMIT))
- clear_cpuidle_map(cpu);
- else {
- set_cpuidle_map(cpu);
- update_load_avg(rq);
- }
}
set_rq_task(rq, next);
- next->last_ran = niffies;
if (likely(prev != next)) {
/*
@@ -4048,16 +4014,14 @@ static void __sched notrace __schedule(bool preempt)
check_siblings(rq);
else
wake_siblings(rq);
- atomic64_inc(&grq.nr_switches);
+ rq->nr_switches++;
rq->curr = next;
++*switch_count;
trace_sched_switch(preempt, prev, next);
rq = context_switch(rq, prev, next); /* unlocks the rq */
- } else {
- check_siblings(rq);
+ } else
rq_unlock_irq(rq);
- }
}
static inline void sched_submit_work(struct task_struct *tsk)
@@ -5618,7 +5582,7 @@ void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_ma
p->nr_cpus_allowed = cpumask_weight(new_mask);
}
-void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
+static void __do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
{
struct rq *rq = task_rq(p);
@@ -5633,6 +5597,29 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
*/
lockdep_assert_held(&rq->lock);
}
+}
+
+/*
+ * Calling do_set_cpus_allowed from outside the scheduler code may make the
+ * task not be able to run on its current CPU so we resched it here.
+ */
+void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
+{
+ __do_set_cpus_allowed(p, new_mask);
+ if (needs_other_cpu(p, task_cpu(p))) {
+ set_task_cpu(p, valid_task_cpu(p));
+ resched_task(p);
+ }
+}
+
+/*
+ * For internal scheduler calls to do_set_cpus_allowed which will resched
+ * themselves if needed.
+ */
+static void _do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
+{
+ __do_set_cpus_allowed(p, new_mask);
+ /* __set_cpus_allowed_ptr will handle the reschedule in this variant */
if (needs_other_cpu(p, task_cpu(p)))
set_task_cpu(p, valid_task_cpu(p));
}
@@ -5830,7 +5817,7 @@ void wake_up_idle_cpu(int cpu)
return;
if (set_nr_and_not_polling(cpu_rq(cpu)->idle))
- smp_send_reschedule(cpu);
+ smp_sched_reschedule(cpu);
else
trace_sched_wake_idle_without_ipi(cpu);
}
@@ -5890,7 +5877,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
queued = task_queued(p);
- do_set_cpus_allowed(p, new_mask);
+ _do_set_cpus_allowed(p, new_mask);
if (p->flags & PF_KTHREAD) {
/*
@@ -7476,7 +7463,7 @@ static const cpumask_t *thread_cpumask(int cpu)
/* All this CPU's SMT siblings are idle */
static bool siblings_cpu_idle(struct rq *rq)
{
- return cpumask_subset(&rq->thread_mask, &grq.cpu_idle_map);
+ return cpumask_subset(&rq->thread_mask, &cpu_idle_map);
}
#endif
#ifdef CONFIG_SCHED_MC
@@ -7487,7 +7474,7 @@ static const cpumask_t *core_cpumask(int cpu)
/* All this CPU's shared cache siblings are idle */
static bool cache_cpu_idle(struct rq *rq)
{
- return cpumask_subset(&rq->core_mask, &grq.cpu_idle_map);
+ return cpumask_subset(&rq->core_mask, &cpu_idle_map);
}
#endif
@@ -7668,15 +7655,11 @@ void __init sched_init(void)
for (i = 1 ; i < NICE_WIDTH ; i++)
prio_ratios[i] = prio_ratios[i - 1] * 11 / 10;
- atomic_set(&grq.nr_running, 0);
- atomic_set(&grq.nr_uninterruptible, 0);
- atomic64_set(&grq.nr_switches, 0);
skiplist_node_init(&init_task.node);
#ifdef CONFIG_SMP
init_defrootdomain();
- atomic_set(&grq.qnr, 0);
- cpumask_clear(&grq.cpu_idle_map);
+ cpumask_clear(&cpu_idle_map);
#else
uprq = &per_cpu(runqueues, 0);
#endif
@@ -7690,6 +7673,7 @@ void __init sched_init(void)
#endif /* CONFIG_CGROUP_SCHED */
for_each_possible_cpu(i) {
rq = cpu_rq(i);
+ rq->nr_running = rq->nr_uninterruptible = rq->nr_switches = 0;
skiplist_init(&rq->node);
rq->sl = new_skiplist(&rq->node);
raw_spin_lock_init(&rq->lock);
diff --git a/kernel/sched/MuQSS.h b/kernel/sched/MuQSS.h
index 4e3115dac..10a12b335 100644
--- a/kernel/sched/MuQSS.h
+++ b/kernel/sched/MuQSS.h
@@ -17,6 +17,9 @@
struct rq {
struct task_struct *curr, *idle, *stop;
struct mm_struct *prev_mm;
+ long nr_uninterruptible;
+ s64 nr_switches;
+ int nr_running;
raw_spinlock_t lock;
diff --git a/kernel/skip_list.c b/kernel/skip_list.c
index 5c66067f2..d52508056 100644
--- a/kernel/skip_list.c
+++ b/kernel/skip_list.c
@@ -93,34 +93,9 @@ void skiplist_node_init(skiplist_node *node)
memset(node, 0, sizeof(skiplist_node));
}
-/*
- * Returns a pseudo-random number based on the randseed value by masking out
- * 0-15. As many levels are not required when only few values are on the list,
- * we limit the height of the levels according to how many list entries there
- * are in a cheap manner. The height of the levels may have been higher while
- * there were more entries queued previously but as this code is used only by
- * the scheduler, entries are short lived and will be torn down regularly.
- *
- * 00-03 entries - 1 level
- * 04-07 entries - 2 levels
- * 08-15 entries - 4 levels
- * 15-31 entries - 7 levels
- * 32+ entries - max(16) levels
- */
-static inline unsigned int randomLevel(int entries, unsigned int randseed)
+static inline unsigned int randomLevel(const long unsigned int randseed)
{
- unsigned int mask;
-
- if (entries > 15)
- mask = 0x7;
- else if (entries > 7)
- mask = 0x3;
- else if (entries > 3)
- mask = 0x1;
- else
- return 0;
-
- return randseed & mask;
+ return find_first_bit(&randseed, MaxLevel);
}
void skiplist_insert(skiplist *l, skiplist_node *node, keyType key, valueType value, unsigned int randseed)
@@ -136,9 +111,8 @@ void skiplist_insert(skiplist *l, skiplist_node *node, keyType key, valueType va
update[k] = p;
} while (--k >= 0);
- k = randomLevel(++l->entries, randseed);
- if (k > MaxLevel)
- k = MaxLevel;
+ ++l->entries;
+ k = randomLevel(randseed);
if (k > l->level) {
k = ++l->level;
update[k] = l->header;