summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Kconfig.hz15
-rw-r--r--kernel/bpf/hashtab.c3
-rw-r--r--kernel/sched/MuQSS.c228
3 files changed, 131 insertions, 115 deletions
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
index ecde22d15..1532e4e14 100644
--- a/kernel/Kconfig.hz
+++ b/kernel/Kconfig.hz
@@ -4,8 +4,8 @@
choice
prompt "Timer frequency"
- default HZ_250
- default HZ_100 if SCHED_MUQSS
+ default HZ_128 if SCHED_MUQSS
+ default HZ_250 if !SCHED_MUQSS
help
Allows the configuration of the timer frequency. It is customary
to have the timer interrupt run at 1000 Hz but 100 Hz may be more
@@ -24,6 +24,16 @@ choice
with lots of processors that may show reduced performance if
too many timer interrupts are occurring.
+ config HZ_128
+ bool "128 HZ"
+ help
+ 128 Hz is a suitable choice in combination with MuQSS which does
+ not rely on ticks for rescheduling interrupts, and is not Hz limited
+ for timeouts and sleeps from both the kernel and userspace.
+ This allows us to benefit from the lower overhead and higher
+ throughput of fewer timer ticks and the microoptimisation of Hz
+ divisions being a power of 2.
+
config HZ_250
bool "250 HZ"
help
@@ -51,6 +61,7 @@ endchoice
config HZ
int
default 100 if HZ_100
+ default 128 if HZ_128
default 250 if HZ_250
default 300 if HZ_300
default 1000 if HZ_1000
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 570eeca7b..ad1bc67af 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -687,7 +687,8 @@ static void delete_all_elements(struct bpf_htab *htab)
hlist_for_each_entry_safe(l, n, head, hash_node) {
hlist_del_rcu(&l->hash_node);
- htab_elem_free(htab, l);
+ if (l->state != HTAB_EXTRA_ELEM_USED)
+ htab_elem_free(htab, l);
}
}
}
diff --git a/kernel/sched/MuQSS.c b/kernel/sched/MuQSS.c
index e256bd60a..ba45cf37b 100644
--- a/kernel/sched/MuQSS.c
+++ b/kernel/sched/MuQSS.c
@@ -137,7 +137,7 @@
void print_scheduler_version(void)
{
- printk(KERN_INFO "MuQSS CPU scheduler v0.140 by Con Kolivas.\n");
+ printk(KERN_INFO "MuQSS CPU scheduler v0.144 by Con Kolivas.\n");
}
/*
@@ -2357,6 +2357,76 @@ static void account_task_cpu(struct rq *rq, struct task_struct *p)
p->last_ran = rq->niffies;
}
+static inline int hrexpiry_enabled(struct rq *rq)
+{
+ if (unlikely(!cpu_active(cpu_of(rq)) || !sched_smp_initialized))
+ return 0;
+ return hrtimer_is_hres_active(&rq->hrexpiry_timer);
+}
+
+/*
+ * Use HR-timers to deliver accurate preemption points.
+ */
+static inline void hrexpiry_clear(struct rq *rq)
+{
+ if (!hrexpiry_enabled(rq))
+ return;
+ if (hrtimer_active(&rq->hrexpiry_timer))
+ hrtimer_cancel(&rq->hrexpiry_timer);
+}
+
+/*
+ * High-resolution time_slice expiry.
+ * Runs from hardirq context with interrupts disabled.
+ */
+static enum hrtimer_restart hrexpiry(struct hrtimer *timer)
+{
+ struct rq *rq = container_of(timer, struct rq, hrexpiry_timer);
+ struct task_struct *p;
+
+ /* This can happen during CPU hotplug / resume */
+ if (unlikely(cpu_of(rq) != smp_processor_id()))
+ goto out;
+
+ /*
+ * We're doing this without the runqueue lock but this should always
+ * be run on the local CPU. Time slice should run out in __schedule
+ * but we set it to zero here in case niffies is slightly less.
+ */
+ p = rq->curr;
+ p->time_slice = 0;
+ __set_tsk_resched(p);
+out:
+ return HRTIMER_NORESTART;
+}
+
+/*
+ * Called to set the hrexpiry timer state.
+ *
+ * called with irqs disabled from the local CPU only
+ */
+static void hrexpiry_start(struct rq *rq, u64 delay)
+{
+ if (!hrexpiry_enabled(rq))
+ return;
+
+ hrtimer_start(&rq->hrexpiry_timer, ns_to_ktime(delay),
+ HRTIMER_MODE_REL_PINNED);
+}
+
+static void init_rq_hrexpiry(struct rq *rq)
+{
+ hrtimer_init(&rq->hrexpiry_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ rq->hrexpiry_timer.function = hrexpiry;
+}
+
+static inline int rq_dither(struct rq *rq)
+{
+ if (!hrexpiry_enabled(rq))
+ return HALF_JIFFY_US;
+ return 0;
+}
+
/*
* wake_up_new_task - wake up a newly created task for the first time.
*
@@ -2425,8 +2495,14 @@ void wake_up_new_task(struct task_struct *p)
* usually avoids a lot of COW overhead.
*/
__set_tsk_resched(rq_curr);
- } else
+ } else {
+ /*
+ * Adjust the hrexpiry since rq_curr will keep
+ * running and its timeslice has been shortened.
+ */
+ hrexpiry_start(rq, US_TO_NS(rq_curr->time_slice));
try_preempt(p, new_rq);
+ }
}
} else {
time_slice_expired(p, new_rq);
@@ -3108,87 +3184,6 @@ unsigned long long task_sched_runtime(struct task_struct *p)
return ns;
}
-#ifdef CONFIG_HIGH_RES_TIMERS
-static inline int hrexpiry_enabled(struct rq *rq)
-{
- if (unlikely(!cpu_active(cpu_of(rq)) || !sched_smp_initialized))
- return 0;
- return hrtimer_is_hres_active(&rq->hrexpiry_timer);
-}
-
-/*
- * Use HR-timers to deliver accurate preemption points.
- */
-static void hrexpiry_clear(struct rq *rq)
-{
- if (!hrexpiry_enabled(rq))
- return;
- if (hrtimer_active(&rq->hrexpiry_timer))
- hrtimer_cancel(&rq->hrexpiry_timer);
-}
-
-/*
- * High-resolution time_slice expiry.
- * Runs from hardirq context with interrupts disabled.
- */
-static enum hrtimer_restart hrexpiry(struct hrtimer *timer)
-{
- struct rq *rq = container_of(timer, struct rq, hrexpiry_timer);
- struct task_struct *p;
-
- /* This can happen during CPU hotplug / resume */
- if (unlikely(cpu_of(rq) != smp_processor_id()))
- goto out;
-
- /*
- * We're doing this without the runqueue lock but this should always
- * be run on the local CPU. Time slice should run out in __schedule
- * but we set it to zero here in case niffies is slightly less.
- */
- p = rq->curr;
- p->time_slice = 0;
- __set_tsk_resched(p);
-out:
- return HRTIMER_NORESTART;
-}
-
-/*
- * Called to set the hrexpiry timer state.
- *
- * called with irqs disabled from the local CPU only
- */
-static void hrexpiry_start(struct rq *rq, u64 delay)
-{
- if (!hrexpiry_enabled(rq))
- return;
-
- hrtimer_start(&rq->hrexpiry_timer, ns_to_ktime(delay),
- HRTIMER_MODE_REL_PINNED);
-}
-
-static void init_rq_hrexpiry(struct rq *rq)
-{
- hrtimer_init(&rq->hrexpiry_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
- rq->hrexpiry_timer.function = hrexpiry;
-}
-
-static inline int rq_dither(struct rq *rq)
-{
- if (!hrexpiry_enabled(rq))
- return HALF_JIFFY_US;
- return 0;
-}
-#else /* CONFIG_HIGH_RES_TIMERS */
-static inline void init_rq_hrexpiry(struct rq *rq)
-{
-}
-
-static inline int rq_dither(struct rq *rq)
-{
- return HALF_JIFFY_US;
-}
-#endif /* CONFIG_HIGH_RES_TIMERS */
-
/*
* Functions to test for when SCHED_ISO tasks have used their allocated
* quota as real time scheduling and convert them back to SCHED_NORMAL. All
@@ -3491,16 +3486,15 @@ static inline void check_deadline(struct task_struct *p, struct rq *rq)
static inline struct task_struct
*earliest_deadline_task(struct rq *rq, int cpu, struct task_struct *idle)
{
+ struct rq *locked = NULL, *chosen = NULL;
struct task_struct *edt = idle;
- struct rq *locked = NULL;
int i, best_entries = 0;
u64 best_key = ~0ULL;
for (i = 0; i < num_possible_cpus(); i++) {
struct rq *other_rq = rq_order(rq, i);
int entries = other_rq->sl->entries;
- struct task_struct *p;
- u64 key;
+ skiplist_node *next;
/*
* Check for queued entres lockless first. The local runqueue
@@ -3534,35 +3528,47 @@ static inline struct task_struct
continue;
}
}
- key = other_rq->node.next[0]->key;
- /* Reevaluate key after locking */
- if (unlikely(key >= best_key)) {
- /* This will always be when rq != other_rq */
- unlock_rq(other_rq);
- continue;
- }
- p = other_rq->node.next[0]->value;
- if (!smt_schedule(p, rq)) {
- if (i)
- unlock_rq(other_rq);
- continue;
- }
+ next = &other_rq->node;
+ /*
+ * In interactive mode we check beyond the best entry on other
+ * runqueues if we can't get the best for smt or affinity
+ * reasons.
+ */
+ while ((next = next->next[0]) != &other_rq->node) {
+ struct task_struct *p;
+ u64 key = next->key;
- /* Make sure affinity is ok */
- if (i) {
- if (needs_other_cpu(p, cpu)) {
- unlock_rq(other_rq);
+ /* Reevaluate key after locking */
+ if (key >= best_key)
+ break;
+
+ p = next->value;
+ if (!smt_schedule(p, rq)) {
+ if (i && !sched_interactive)
+ break;
continue;
}
- if (locked)
- unlock_rq(locked);
- locked = other_rq;
- }
- best_entries = entries;
- best_key = key;
- edt = p;
+ /* Make sure affinity is ok */
+ if (i) {
+ if (needs_other_cpu(p, cpu)) {
+ if (sched_interactive)
+ continue;
+ break;
+ }
+ /* From this point on p is the best so far */
+ if (locked)
+ unlock_rq(locked);
+ chosen = locked = other_rq;
+ }
+ best_entries = entries;
+ best_key = key;
+ edt = p;
+ break;
+ }
+ if (i && other_rq != chosen)
+ unlock_rq(other_rq);
}
if (likely(edt != idle))
@@ -3640,12 +3646,10 @@ static inline void schedule_debug(struct task_struct *prev)
*/
static inline void set_rq_task(struct rq *rq, struct task_struct *p)
{
-#ifdef CONFIG_HIGH_RES_TIMERS
if (p == rq->idle || p->policy == SCHED_FIFO)
hrexpiry_clear(rq);
else
hrexpiry_start(rq, US_TO_NS(p->time_slice));
-#endif /* CONFIG_HIGH_RES_TIMERS */
if (rq->clock - rq->last_tick > HALF_JIFFY_NS)
rq->dither = 0;
else