From 29f5b648fa0b31ad614c78468b9279e5fa96397a Mon Sep 17 00:00:00 2001
From: André Fabian Silva Delgado <emulatorman@parabola.nu>
Date: Fri, 11 Nov 2016 16:22:27 -0300
Subject: Linux-libre 4.8.7-gnu

---
 kernel/sched/MuQSS.c | 31 ++++++++++++++--------
 kernel/sched/MuQSS.h | 15 +++++++++++
 kernel/sched/idle.c  | 10 +++++--
 kernel/sched/sched.h |  5 ++++
 kernel/time/timer.c  | 74 +++++++++++++++++++++++++++++++---------------------
 5 files changed, 92 insertions(+), 43 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched/MuQSS.c b/kernel/sched/MuQSS.c
index cf7a95286..7c4ddc734 100644
--- a/kernel/sched/MuQSS.c
+++ b/kernel/sched/MuQSS.c
@@ -760,6 +760,13 @@ static inline bool task_queued(struct task_struct *p)
 static void enqueue_task(struct rq *rq, struct task_struct *p, int flags);
 static inline void resched_if_idle(struct rq *rq);
 
+/* Dodgy workaround till we figure out where the softirqs are going */
+static inline void do_pending_softirq(struct rq *rq, struct task_struct *next)
+{
+	if (unlikely(next == rq->idle && local_softirq_pending() && !in_interrupt()))
+		do_softirq_own_stack();
+}
+
 static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
 {
 #ifdef CONFIG_SMP
@@ -814,7 +821,11 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
 		raw_spin_unlock(&prev->pi_lock);
 	}
 #endif
-	raw_spin_unlock_irq(&rq->lock);
+	rq_unlock(rq);
+
+	do_pending_softirq(rq, current);
+
+	local_irq_enable();
 }
 
 static inline bool deadline_before(u64 deadline, u64 time)
@@ -2556,7 +2567,7 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev,
  * past. prev == current is still correct but we need to recalculate this_rq
  * because prev may have moved to another CPU.
  */
-static struct rq *finish_task_switch(struct task_struct *prev)
+static void finish_task_switch(struct task_struct *prev)
 	__releases(rq->lock)
 {
 	struct rq *rq = this_rq();
@@ -2609,7 +2620,6 @@ static struct rq *finish_task_switch(struct task_struct *prev)
 		kprobe_flush_task(prev);
 		put_task_struct(prev);
 	}
-	return rq;
 }
 
 /**
@@ -2617,10 +2627,7 @@ static struct rq *finish_task_switch(struct task_struct *prev)
  * @prev: the thread we just switched away from.
  */
 asmlinkage __visible void schedule_tail(struct task_struct *prev)
-	__releases(rq->lock)
 {
-	struct rq *rq;
-
 	/*
 	 * New tasks start with FORK_PREEMPT_COUNT, see there and
 	 * finish_task_switch() for details.
@@ -2630,7 +2637,7 @@ asmlinkage __visible void schedule_tail(struct task_struct *prev)
 	 * PREEMPT_COUNT kernels).
 	 */
 
-	rq = finish_task_switch(prev);
+	finish_task_switch(prev);
 	preempt_enable();
 
 	if (current->set_child_tid)
@@ -2640,7 +2647,7 @@ asmlinkage __visible void schedule_tail(struct task_struct *prev)
 /*
  * context_switch - switch to the new MM and the new thread's register state.
  */
-static __always_inline struct rq *
+static __always_inline void
 context_switch(struct rq *rq, struct task_struct *prev,
 	       struct task_struct *next)
 {
@@ -2680,7 +2687,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
 	switch_to(prev, next, prev);
 	barrier();
 
-	return finish_task_switch(prev);
+	finish_task_switch(prev);
 }
 
 /*
@@ -3854,10 +3861,12 @@ static void __sched notrace __schedule(bool preempt)
 		++*switch_count;
 
 		trace_sched_switch(preempt, prev, next);
-		rq = context_switch(rq, prev, next); /* unlocks the rq */
+		context_switch(rq, prev, next); /* unlocks the rq */
 	} else {
 		check_siblings(rq);
-		rq_unlock_irq(rq);
+		rq_unlock(rq);
+		do_pending_softirq(rq, next);
+		local_irq_enable();
 	}
 }
 
diff --git a/kernel/sched/MuQSS.h b/kernel/sched/MuQSS.h
index f9510d739..3565a7d8a 100644
--- a/kernel/sched/MuQSS.h
+++ b/kernel/sched/MuQSS.h
@@ -1,5 +1,6 @@
 #include <linux/sched.h>
 #include <linux/cpuidle.h>
+#include <linux/interrupt.h>
 #include <linux/skip_list.h>
 #include <linux/stop_machine.h>
 #include "cpuacct.h"
@@ -325,4 +326,18 @@ static inline void cpufreq_trigger(u64 time, unsigned long util)
 #define arch_scale_freq_invariant()	(false)
 #endif
 
+/*
+ * This should only be called when current == rq->idle. Dodgy workaround for
+ * when softirqs are pending and we are in the idle loop. Setting current to
+ * resched will kick us out of the idle loop and the softirqs will be serviced
+ * on our next pass through schedule().
+ */
+static inline bool softirq_pending(int cpu)
+{
+	if (likely(!local_softirq_pending()))
+		return false;
+	set_tsk_need_resched(current);
+	return true;
+}
+
 #endif /* MUQSS_SCHED_H */
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 060b76d85..51264e6b1 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -208,6 +208,8 @@ static void cpu_idle_loop(void)
 	int cpu = smp_processor_id();
 
 	while (1) {
+		bool pending = false;
+
 		/*
 		 * If the arch has a polling bit, we maintain an invariant:
 		 *
@@ -219,7 +221,10 @@ static void cpu_idle_loop(void)
 
 		__current_set_polling();
 		quiet_vmstat();
-		tick_nohz_idle_enter();
+		if (unlikely(softirq_pending(cpu)))
+			pending = true;
+		else
+			tick_nohz_idle_enter();
 
 		while (!need_resched()) {
 			check_pgt_cache();
@@ -259,7 +264,8 @@ static void cpu_idle_loop(void)
 		 * not have had an IPI to fold the state for us.
 		 */
 		preempt_set_need_resched();
-		tick_nohz_idle_exit();
+		if (!pending)
+			tick_nohz_idle_exit();
 		__current_clr_polling();
 
 		/*
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index c64fc5114..cdefab6df 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1813,3 +1813,8 @@ static inline void cpufreq_trigger_update(u64 time) {}
 #else /* arch_scale_freq_capacity */
 #define arch_scale_freq_invariant()	(false)
 #endif
+
+static inline bool softirq_pending(int cpu)
+{
+	return false;
+}
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 32bf6f75a..96db64bde 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -878,7 +878,7 @@ static inline struct timer_base *get_timer_base(u32 tflags)
 
 #ifdef CONFIG_NO_HZ_COMMON
 static inline struct timer_base *
-__get_target_base(struct timer_base *base, unsigned tflags)
+get_target_base(struct timer_base *base, unsigned tflags)
 {
 #ifdef CONFIG_SMP
 	if ((tflags & TIMER_PINNED) || !base->migration_enabled)
@@ -891,25 +891,27 @@ __get_target_base(struct timer_base *base, unsigned tflags)
 
 static inline void forward_timer_base(struct timer_base *base)
 {
+	unsigned long jnow = READ_ONCE(jiffies);
+
 	/*
 	 * We only forward the base when it's idle and we have a delta between
 	 * base clock and jiffies.
 	 */
-	if (!base->is_idle || (long) (jiffies - base->clk) < 2)
+	if (!base->is_idle || (long) (jnow - base->clk) < 2)
 		return;
 
 	/*
 	 * If the next expiry value is > jiffies, then we fast forward to
 	 * jiffies otherwise we forward to the next expiry value.
 	 */
-	if (time_after(base->next_expiry, jiffies))
-		base->clk = jiffies;
+	if (time_after(base->next_expiry, jnow))
+		base->clk = jnow;
 	else
 		base->clk = base->next_expiry;
 }
 #else
 static inline struct timer_base *
-__get_target_base(struct timer_base *base, unsigned tflags)
+get_target_base(struct timer_base *base, unsigned tflags)
 {
 	return get_timer_this_cpu_base(tflags);
 }
@@ -917,14 +919,6 @@ __get_target_base(struct timer_base *base, unsigned tflags)
 static inline void forward_timer_base(struct timer_base *base) { }
 #endif
 
-static inline struct timer_base *
-get_target_base(struct timer_base *base, unsigned tflags)
-{
-	struct timer_base *target = __get_target_base(base, tflags);
-
-	forward_timer_base(target);
-	return target;
-}
 
 /*
  * We are using hashed locking: Holding per_cpu(timer_bases[x]).lock means
@@ -943,7 +937,14 @@ static struct timer_base *lock_timer_base(struct timer_list *timer,
 {
 	for (;;) {
 		struct timer_base *base;
-		u32 tf = timer->flags;
+		u32 tf;
+
+		/*
+		 * We need to use READ_ONCE() here, otherwise the compiler
+		 * might re-read @tf between the check for TIMER_MIGRATING
+		 * and spin_lock().
+		 */
+		tf = READ_ONCE(timer->flags);
 
 		if (!(tf & TIMER_MIGRATING)) {
 			base = get_timer_base(tf);
@@ -964,6 +965,8 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
 	unsigned long clk = 0, flags;
 	int ret = 0;
 
+	BUG_ON(!timer->function);
+
 	/*
 	 * This is a common optimization triggered by the networking code - if
 	 * the timer is re-modified to have the same timeout or ends up in the
@@ -972,13 +975,16 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
 	if (timer_pending(timer)) {
 		if (timer->expires == expires)
 			return 1;
+
 		/*
-		 * Take the current timer_jiffies of base, but without holding
-		 * the lock!
+		 * We lock timer base and calculate the bucket index right
+		 * here. If the timer ends up in the same bucket, then we
+		 * just update the expiry time and avoid the whole
+		 * dequeue/enqueue dance.
 		 */
-		base = get_timer_base(timer->flags);
-		clk = base->clk;
+		base = lock_timer_base(timer, &flags);
 
+		clk = base->clk;
 		idx = calc_wheel_index(expires, clk);
 
 		/*
@@ -988,14 +994,14 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
 		 */
 		if (idx == timer_get_idx(timer)) {
 			timer->expires = expires;
-			return 1;
+			ret = 1;
+			goto out_unlock;
 		}
+	} else {
+		base = lock_timer_base(timer, &flags);
 	}
 
 	timer_stats_timer_set_start_info(timer);
-	BUG_ON(!timer->function);
-
-	base = lock_timer_base(timer, &flags);
 
 	ret = detach_if_pending(timer, base, false);
 	if (!ret && pending_only)
@@ -1025,12 +1031,16 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
 		}
 	}
 
+	/* Try to forward a stale timer base clock */
+	forward_timer_base(base);
+
 	timer->expires = expires;
 	/*
 	 * If 'idx' was calculated above and the base time did not advance
-	 * between calculating 'idx' and taking the lock, only enqueue_timer()
-	 * and trigger_dyntick_cpu() is required. Otherwise we need to
-	 * (re)calculate the wheel index via internal_add_timer().
+	 * between calculating 'idx' and possibly switching the base, only
+	 * enqueue_timer() and trigger_dyntick_cpu() is required. Otherwise
+	 * we need to (re)calculate the wheel index via
+	 * internal_add_timer().
 	 */
 	if (idx != UINT_MAX && clk == base->clk) {
 		enqueue_timer(base, timer, idx);
@@ -1510,12 +1520,16 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
 	is_max_delta = (nextevt == base->clk + NEXT_TIMER_MAX_DELTA);
 	base->next_expiry = nextevt;
 	/*
-	 * We have a fresh next event. Check whether we can forward the base:
+	 * We have a fresh next event. Check whether we can forward the
+	 * base. We can only do that when @basej is past base->clk
+	 * otherwise we might rewind base->clk.
 	 */
-	if (time_after(nextevt, jiffies))
-		base->clk = jiffies;
-	else if (time_after(nextevt, base->clk))
-		base->clk = nextevt;
+	if (time_after(basej, base->clk)) {
+		if (time_after(nextevt, basej))
+			base->clk = basej;
+		else if (time_after(nextevt, base->clk))
+			base->clk = nextevt;
+	}
 
 	if (time_before_eq(nextevt, basej)) {
 		expires = basem;
-- 
cgit v1.2.3-54-g00ecf