From 29f5b648fa0b31ad614c78468b9279e5fa96397a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Fabian=20Silva=20Delgado?= Date: Fri, 11 Nov 2016 16:22:27 -0300 Subject: Linux-libre 4.8.7-gnu --- kernel/sched/MuQSS.c | 31 ++++++++++++++-------- kernel/sched/MuQSS.h | 15 +++++++++++ kernel/sched/idle.c | 10 +++++-- kernel/sched/sched.h | 5 ++++ kernel/time/timer.c | 74 +++++++++++++++++++++++++++++++--------------------- 5 files changed, 92 insertions(+), 43 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/MuQSS.c b/kernel/sched/MuQSS.c index cf7a95286..7c4ddc734 100644 --- a/kernel/sched/MuQSS.c +++ b/kernel/sched/MuQSS.c @@ -760,6 +760,13 @@ static inline bool task_queued(struct task_struct *p) static void enqueue_task(struct rq *rq, struct task_struct *p, int flags); static inline void resched_if_idle(struct rq *rq); +/* Dodgy workaround till we figure out where the softirqs are going */ +static inline void do_pending_softirq(struct rq *rq, struct task_struct *next) +{ + if (unlikely(next == rq->idle && local_softirq_pending() && !in_interrupt())) + do_softirq_own_stack(); +} + static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) { #ifdef CONFIG_SMP @@ -814,7 +821,11 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) raw_spin_unlock(&prev->pi_lock); } #endif - raw_spin_unlock_irq(&rq->lock); + rq_unlock(rq); + + do_pending_softirq(rq, current); + + local_irq_enable(); } static inline bool deadline_before(u64 deadline, u64 time) @@ -2556,7 +2567,7 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev, * past. prev == current is still correct but we need to recalculate this_rq * because prev may have moved to another CPU. */ -static struct rq *finish_task_switch(struct task_struct *prev) +static void finish_task_switch(struct task_struct *prev) __releases(rq->lock) { struct rq *rq = this_rq(); @@ -2609,7 +2620,6 @@ static struct rq *finish_task_switch(struct task_struct *prev) kprobe_flush_task(prev); put_task_struct(prev); } - return rq; } /** @@ -2617,10 +2627,7 @@ static struct rq *finish_task_switch(struct task_struct *prev) * @prev: the thread we just switched away from. */ asmlinkage __visible void schedule_tail(struct task_struct *prev) - __releases(rq->lock) { - struct rq *rq; - /* * New tasks start with FORK_PREEMPT_COUNT, see there and * finish_task_switch() for details. @@ -2630,7 +2637,7 @@ asmlinkage __visible void schedule_tail(struct task_struct *prev) * PREEMPT_COUNT kernels). */ - rq = finish_task_switch(prev); + finish_task_switch(prev); preempt_enable(); if (current->set_child_tid) @@ -2640,7 +2647,7 @@ asmlinkage __visible void schedule_tail(struct task_struct *prev) /* * context_switch - switch to the new MM and the new thread's register state. */ -static __always_inline struct rq * +static __always_inline void context_switch(struct rq *rq, struct task_struct *prev, struct task_struct *next) { @@ -2680,7 +2687,7 @@ context_switch(struct rq *rq, struct task_struct *prev, switch_to(prev, next, prev); barrier(); - return finish_task_switch(prev); + finish_task_switch(prev); } /* @@ -3854,10 +3861,12 @@ static void __sched notrace __schedule(bool preempt) ++*switch_count; trace_sched_switch(preempt, prev, next); - rq = context_switch(rq, prev, next); /* unlocks the rq */ + context_switch(rq, prev, next); /* unlocks the rq */ } else { check_siblings(rq); - rq_unlock_irq(rq); + rq_unlock(rq); + do_pending_softirq(rq, next); + local_irq_enable(); } } diff --git a/kernel/sched/MuQSS.h b/kernel/sched/MuQSS.h index f9510d739..3565a7d8a 100644 --- a/kernel/sched/MuQSS.h +++ b/kernel/sched/MuQSS.h @@ -1,5 +1,6 @@ #include #include +#include #include #include #include "cpuacct.h" @@ -325,4 +326,18 @@ static inline void cpufreq_trigger(u64 time, unsigned long util) #define arch_scale_freq_invariant() (false) #endif +/* + * This should only be called when current == rq->idle. Dodgy workaround for + * when softirqs are pending and we are in the idle loop. Setting current to + * resched will kick us out of the idle loop and the softirqs will be serviced + * on our next pass through schedule(). + */ +static inline bool softirq_pending(int cpu) +{ + if (likely(!local_softirq_pending())) + return false; + set_tsk_need_resched(current); + return true; +} + #endif /* MUQSS_SCHED_H */ diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 060b76d85..51264e6b1 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -208,6 +208,8 @@ static void cpu_idle_loop(void) int cpu = smp_processor_id(); while (1) { + bool pending = false; + /* * If the arch has a polling bit, we maintain an invariant: * @@ -219,7 +221,10 @@ static void cpu_idle_loop(void) __current_set_polling(); quiet_vmstat(); - tick_nohz_idle_enter(); + if (unlikely(softirq_pending(cpu))) + pending = true; + else + tick_nohz_idle_enter(); while (!need_resched()) { check_pgt_cache(); @@ -259,7 +264,8 @@ static void cpu_idle_loop(void) * not have had an IPI to fold the state for us. */ preempt_set_need_resched(); - tick_nohz_idle_exit(); + if (!pending) + tick_nohz_idle_exit(); __current_clr_polling(); /* diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index c64fc5114..cdefab6df 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1813,3 +1813,8 @@ static inline void cpufreq_trigger_update(u64 time) {} #else /* arch_scale_freq_capacity */ #define arch_scale_freq_invariant() (false) #endif + +static inline bool softirq_pending(int cpu) +{ + return false; +} diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 32bf6f75a..96db64bde 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -878,7 +878,7 @@ static inline struct timer_base *get_timer_base(u32 tflags) #ifdef CONFIG_NO_HZ_COMMON static inline struct timer_base * -__get_target_base(struct timer_base *base, unsigned tflags) +get_target_base(struct timer_base *base, unsigned tflags) { #ifdef CONFIG_SMP if ((tflags & TIMER_PINNED) || !base->migration_enabled) @@ -891,25 +891,27 @@ __get_target_base(struct timer_base *base, unsigned tflags) static inline void forward_timer_base(struct timer_base *base) { + unsigned long jnow = READ_ONCE(jiffies); + /* * We only forward the base when it's idle and we have a delta between * base clock and jiffies. */ - if (!base->is_idle || (long) (jiffies - base->clk) < 2) + if (!base->is_idle || (long) (jnow - base->clk) < 2) return; /* * If the next expiry value is > jiffies, then we fast forward to * jiffies otherwise we forward to the next expiry value. */ - if (time_after(base->next_expiry, jiffies)) - base->clk = jiffies; + if (time_after(base->next_expiry, jnow)) + base->clk = jnow; else base->clk = base->next_expiry; } #else static inline struct timer_base * -__get_target_base(struct timer_base *base, unsigned tflags) +get_target_base(struct timer_base *base, unsigned tflags) { return get_timer_this_cpu_base(tflags); } @@ -917,14 +919,6 @@ __get_target_base(struct timer_base *base, unsigned tflags) static inline void forward_timer_base(struct timer_base *base) { } #endif -static inline struct timer_base * -get_target_base(struct timer_base *base, unsigned tflags) -{ - struct timer_base *target = __get_target_base(base, tflags); - - forward_timer_base(target); - return target; -} /* * We are using hashed locking: Holding per_cpu(timer_bases[x]).lock means @@ -943,7 +937,14 @@ static struct timer_base *lock_timer_base(struct timer_list *timer, { for (;;) { struct timer_base *base; - u32 tf = timer->flags; + u32 tf; + + /* + * We need to use READ_ONCE() here, otherwise the compiler + * might re-read @tf between the check for TIMER_MIGRATING + * and spin_lock(). + */ + tf = READ_ONCE(timer->flags); if (!(tf & TIMER_MIGRATING)) { base = get_timer_base(tf); @@ -964,6 +965,8 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) unsigned long clk = 0, flags; int ret = 0; + BUG_ON(!timer->function); + /* * This is a common optimization triggered by the networking code - if * the timer is re-modified to have the same timeout or ends up in the @@ -972,13 +975,16 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) if (timer_pending(timer)) { if (timer->expires == expires) return 1; + /* - * Take the current timer_jiffies of base, but without holding - * the lock! + * We lock timer base and calculate the bucket index right + * here. If the timer ends up in the same bucket, then we + * just update the expiry time and avoid the whole + * dequeue/enqueue dance. */ - base = get_timer_base(timer->flags); - clk = base->clk; + base = lock_timer_base(timer, &flags); + clk = base->clk; idx = calc_wheel_index(expires, clk); /* @@ -988,14 +994,14 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) */ if (idx == timer_get_idx(timer)) { timer->expires = expires; - return 1; + ret = 1; + goto out_unlock; } + } else { + base = lock_timer_base(timer, &flags); } timer_stats_timer_set_start_info(timer); - BUG_ON(!timer->function); - - base = lock_timer_base(timer, &flags); ret = detach_if_pending(timer, base, false); if (!ret && pending_only) @@ -1025,12 +1031,16 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) } } + /* Try to forward a stale timer base clock */ + forward_timer_base(base); + timer->expires = expires; /* * If 'idx' was calculated above and the base time did not advance - * between calculating 'idx' and taking the lock, only enqueue_timer() - * and trigger_dyntick_cpu() is required. Otherwise we need to - * (re)calculate the wheel index via internal_add_timer(). + * between calculating 'idx' and possibly switching the base, only + * enqueue_timer() and trigger_dyntick_cpu() is required. Otherwise + * we need to (re)calculate the wheel index via + * internal_add_timer(). */ if (idx != UINT_MAX && clk == base->clk) { enqueue_timer(base, timer, idx); @@ -1510,12 +1520,16 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem) is_max_delta = (nextevt == base->clk + NEXT_TIMER_MAX_DELTA); base->next_expiry = nextevt; /* - * We have a fresh next event. Check whether we can forward the base: + * We have a fresh next event. Check whether we can forward the + * base. We can only do that when @basej is past base->clk + * otherwise we might rewind base->clk. */ - if (time_after(nextevt, jiffies)) - base->clk = jiffies; - else if (time_after(nextevt, base->clk)) - base->clk = nextevt; + if (time_after(basej, base->clk)) { + if (time_after(nextevt, basej)) + base->clk = basej; + else if (time_after(nextevt, base->clk)) + base->clk = nextevt; + } if (time_before_eq(nextevt, basej)) { expires = basem; -- cgit v1.2.3