summaryrefslogtreecommitdiff
path: root/kernel/time
diff options
context:
space:
mode:
authorAndré Fabian Silva Delgado <emulatorman@parabola.nu>2016-06-10 05:30:17 -0300
committerAndré Fabian Silva Delgado <emulatorman@parabola.nu>2016-06-10 05:30:17 -0300
commitd635711daa98be86d4c7fd01499c34f566b54ccb (patch)
treeaa5cc3760a27c3d57146498cb82fa549547de06c /kernel/time
parentc91265cd0efb83778f015b4d4b1129bd2cfd075e (diff)
Linux-libre 4.6.2-gnu
Diffstat (limited to 'kernel/time')
-rw-r--r--kernel/time/clocksource.c52
-rw-r--r--kernel/time/hrtimer.c18
-rw-r--r--kernel/time/jiffies.c2
-rw-r--r--kernel/time/posix-cpu-timers.c52
-rw-r--r--kernel/time/tick-sched.c200
-rw-r--r--kernel/time/tick-sched.h1
-rw-r--r--kernel/time/time.c9
-rw-r--r--kernel/time/timekeeping.c288
-rw-r--r--kernel/time/timer.c15
9 files changed, 489 insertions, 148 deletions
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 664de5392..56ece145a 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -323,13 +323,42 @@ static void clocksource_enqueue_watchdog(struct clocksource *cs)
/* cs is a watchdog. */
if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS)
cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
+ }
+ spin_unlock_irqrestore(&watchdog_lock, flags);
+}
+
+static void clocksource_select_watchdog(bool fallback)
+{
+ struct clocksource *cs, *old_wd;
+ unsigned long flags;
+
+ spin_lock_irqsave(&watchdog_lock, flags);
+ /* save current watchdog */
+ old_wd = watchdog;
+ if (fallback)
+ watchdog = NULL;
+
+ list_for_each_entry(cs, &clocksource_list, list) {
+ /* cs is a clocksource to be watched. */
+ if (cs->flags & CLOCK_SOURCE_MUST_VERIFY)
+ continue;
+
+ /* Skip current if we were requested for a fallback. */
+ if (fallback && cs == old_wd)
+ continue;
+
/* Pick the best watchdog. */
- if (!watchdog || cs->rating > watchdog->rating) {
+ if (!watchdog || cs->rating > watchdog->rating)
watchdog = cs;
- /* Reset watchdog cycles */
- clocksource_reset_watchdog();
- }
}
+ /* If we failed to find a fallback restore the old one. */
+ if (!watchdog)
+ watchdog = old_wd;
+
+ /* If we changed the watchdog we need to reset cycles. */
+ if (watchdog != old_wd)
+ clocksource_reset_watchdog();
+
/* Check if the watchdog timer needs to be started. */
clocksource_start_watchdog();
spin_unlock_irqrestore(&watchdog_lock, flags);
@@ -404,6 +433,7 @@ static void clocksource_enqueue_watchdog(struct clocksource *cs)
cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
}
+static void clocksource_select_watchdog(bool fallback) { }
static inline void clocksource_dequeue_watchdog(struct clocksource *cs) { }
static inline void clocksource_resume_watchdog(void) { }
static inline int __clocksource_watchdog_kthread(void) { return 0; }
@@ -736,6 +766,7 @@ int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
clocksource_enqueue(cs);
clocksource_enqueue_watchdog(cs);
clocksource_select();
+ clocksource_select_watchdog(false);
mutex_unlock(&clocksource_mutex);
return 0;
}
@@ -758,6 +789,7 @@ void clocksource_change_rating(struct clocksource *cs, int rating)
mutex_lock(&clocksource_mutex);
__clocksource_change_rating(cs, rating);
clocksource_select();
+ clocksource_select_watchdog(false);
mutex_unlock(&clocksource_mutex);
}
EXPORT_SYMBOL(clocksource_change_rating);
@@ -767,12 +799,12 @@ EXPORT_SYMBOL(clocksource_change_rating);
*/
static int clocksource_unbind(struct clocksource *cs)
{
- /*
- * I really can't convince myself to support this on hardware
- * designed by lobotomized monkeys.
- */
- if (clocksource_is_watchdog(cs))
- return -EBUSY;
+ if (clocksource_is_watchdog(cs)) {
+ /* Select and try to install a replacement watchdog. */
+ clocksource_select_watchdog(true);
+ if (clocksource_is_watchdog(cs))
+ return -EBUSY;
+ }
if (cs == curr_clocksource) {
/* Select and try to install a replacement clock source */
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index fa909f9fd..fa0b98329 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -515,7 +515,7 @@ static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
/*
* High resolution timer enabled ?
*/
-static int hrtimer_hres_enabled __read_mostly = 1;
+static bool hrtimer_hres_enabled __read_mostly = true;
unsigned int hrtimer_resolution __read_mostly = LOW_RES_NSEC;
EXPORT_SYMBOL_GPL(hrtimer_resolution);
@@ -524,13 +524,7 @@ EXPORT_SYMBOL_GPL(hrtimer_resolution);
*/
static int __init setup_hrtimer_hres(char *str)
{
- if (!strcmp(str, "off"))
- hrtimer_hres_enabled = 0;
- else if (!strcmp(str, "on"))
- hrtimer_hres_enabled = 1;
- else
- return 0;
- return 1;
+ return (kstrtobool(str, &hrtimer_hres_enabled) == 0);
}
__setup("highres=", setup_hrtimer_hres);
@@ -979,7 +973,7 @@ static inline ktime_t hrtimer_update_lowres(struct hrtimer *timer, ktime_t tim,
* relative (HRTIMER_MODE_REL)
*/
void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
- unsigned long delta_ns, const enum hrtimer_mode mode)
+ u64 delta_ns, const enum hrtimer_mode mode)
{
struct hrtimer_clock_base *base, *new_base;
unsigned long flags;
@@ -1548,7 +1542,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
struct restart_block *restart;
struct hrtimer_sleeper t;
int ret = 0;
- unsigned long slack;
+ u64 slack;
slack = current->timer_slack_ns;
if (dl_task(current) || rt_task(current))
@@ -1724,7 +1718,7 @@ void __init hrtimers_init(void)
* @clock: timer clock, CLOCK_MONOTONIC or CLOCK_REALTIME
*/
int __sched
-schedule_hrtimeout_range_clock(ktime_t *expires, unsigned long delta,
+schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta,
const enum hrtimer_mode mode, int clock)
{
struct hrtimer_sleeper t;
@@ -1792,7 +1786,7 @@ schedule_hrtimeout_range_clock(ktime_t *expires, unsigned long delta,
*
* Returns 0 when the timer has expired otherwise -EINTR
*/
-int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta,
+int __sched schedule_hrtimeout_range(ktime_t *expires, u64 delta,
const enum hrtimer_mode mode)
{
return schedule_hrtimeout_range_clock(expires, delta, mode,
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index 347fecf86..555e21f7b 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -68,7 +68,7 @@ static struct clocksource clocksource_jiffies = {
.name = "jiffies",
.rating = 1, /* lowest valid rating*/
.read = jiffies_read,
- .mask = 0xffffffff, /*32bits*/
+ .mask = CLOCKSOURCE_MASK(32),
.mult = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */
.shift = JIFFIES_SHIFT,
.max_cycles = 10,
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 7a48442ed..e284c2316 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -333,7 +333,6 @@ static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp)
return err;
}
-
/*
* Validate the clockid_t for a new CPU-clock timer, and initialize the timer.
* This is called from sys_timer_create() and do_cpu_nanosleep() with the
@@ -517,6 +516,10 @@ static void arm_timer(struct k_itimer *timer)
cputime_expires->sched_exp = exp;
break;
}
+ if (CPUCLOCK_PERTHREAD(timer->it_clock))
+ tick_dep_set_task(p, TICK_DEP_BIT_POSIX_TIMER);
+ else
+ tick_dep_set_signal(p->signal, TICK_DEP_BIT_POSIX_TIMER);
}
}
@@ -582,39 +585,6 @@ static int cpu_timer_sample_group(const clockid_t which_clock,
return 0;
}
-#ifdef CONFIG_NO_HZ_FULL
-static void nohz_kick_work_fn(struct work_struct *work)
-{
- tick_nohz_full_kick_all();
-}
-
-static DECLARE_WORK(nohz_kick_work, nohz_kick_work_fn);
-
-/*
- * We need the IPIs to be sent from sane process context.
- * The posix cpu timers are always set with irqs disabled.
- */
-static void posix_cpu_timer_kick_nohz(void)
-{
- if (context_tracking_is_enabled())
- schedule_work(&nohz_kick_work);
-}
-
-bool posix_cpu_timers_can_stop_tick(struct task_struct *tsk)
-{
- if (!task_cputime_zero(&tsk->cputime_expires))
- return false;
-
- /* Check if cputimer is running. This is accessed without locking. */
- if (READ_ONCE(tsk->signal->cputimer.running))
- return false;
-
- return true;
-}
-#else
-static inline void posix_cpu_timer_kick_nohz(void) { }
-#endif
-
/*
* Guts of sys_timer_settime for CPU timers.
* This is called with the timer locked and interrupts disabled.
@@ -761,8 +731,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
sample_to_timespec(timer->it_clock,
old_incr, &old->it_interval);
}
- if (!ret)
- posix_cpu_timer_kick_nohz();
+
return ret;
}
@@ -911,6 +880,8 @@ static void check_thread_timers(struct task_struct *tsk,
__group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
}
}
+ if (task_cputime_zero(tsk_expires))
+ tick_dep_clear_task(tsk, TICK_DEP_BIT_POSIX_TIMER);
}
static inline void stop_process_timers(struct signal_struct *sig)
@@ -919,6 +890,7 @@ static inline void stop_process_timers(struct signal_struct *sig)
/* Turn off cputimer->running. This is done without locking. */
WRITE_ONCE(cputimer->running, false);
+ tick_dep_clear_signal(sig, TICK_DEP_BIT_POSIX_TIMER);
}
static u32 onecputick;
@@ -1095,8 +1067,6 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
arm_timer(timer);
unlock_task_sighand(p, &flags);
- /* Kick full dynticks CPUs in case they need to tick on the new timer */
- posix_cpu_timer_kick_nohz();
out:
timer->it_overrun_last = timer->it_overrun;
timer->it_overrun = -1;
@@ -1270,7 +1240,7 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
}
if (!*newval)
- goto out;
+ return;
*newval += now;
}
@@ -1288,8 +1258,8 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
tsk->signal->cputime_expires.virt_exp = *newval;
break;
}
-out:
- posix_cpu_timer_kick_nohz();
+
+ tick_dep_set_signal(tsk->signal, TICK_DEP_BIT_POSIX_TIMER);
}
static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 0b1742434..58e3310c9 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -22,7 +22,6 @@
#include <linux/module.h>
#include <linux/irq_work.h>
#include <linux/posix-timers.h>
-#include <linux/perf_event.h>
#include <linux/context_tracking.h>
#include <asm/irq_regs.h>
@@ -158,54 +157,61 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
cpumask_var_t tick_nohz_full_mask;
cpumask_var_t housekeeping_mask;
bool tick_nohz_full_running;
+static atomic_t tick_dep_mask;
-static bool can_stop_full_tick(void)
+static bool check_tick_dependency(atomic_t *dep)
+{
+ int val = atomic_read(dep);
+
+ if (val & TICK_DEP_MASK_POSIX_TIMER) {
+ trace_tick_stop(0, TICK_DEP_MASK_POSIX_TIMER);
+ return true;
+ }
+
+ if (val & TICK_DEP_MASK_PERF_EVENTS) {
+ trace_tick_stop(0, TICK_DEP_MASK_PERF_EVENTS);
+ return true;
+ }
+
+ if (val & TICK_DEP_MASK_SCHED) {
+ trace_tick_stop(0, TICK_DEP_MASK_SCHED);
+ return true;
+ }
+
+ if (val & TICK_DEP_MASK_CLOCK_UNSTABLE) {
+ trace_tick_stop(0, TICK_DEP_MASK_CLOCK_UNSTABLE);
+ return true;
+ }
+
+ return false;
+}
+
+static bool can_stop_full_tick(struct tick_sched *ts)
{
WARN_ON_ONCE(!irqs_disabled());
- if (!sched_can_stop_tick()) {
- trace_tick_stop(0, "more than 1 task in runqueue\n");
+ if (check_tick_dependency(&tick_dep_mask))
return false;
- }
- if (!posix_cpu_timers_can_stop_tick(current)) {
- trace_tick_stop(0, "posix timers running\n");
+ if (check_tick_dependency(&ts->tick_dep_mask))
return false;
- }
- if (!perf_event_can_stop_tick()) {
- trace_tick_stop(0, "perf events running\n");
+ if (check_tick_dependency(&current->tick_dep_mask))
return false;
- }
- /* sched_clock_tick() needs us? */
-#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
- /*
- * TODO: kick full dynticks CPUs when
- * sched_clock_stable is set.
- */
- if (!sched_clock_stable()) {
- trace_tick_stop(0, "unstable sched clock\n");
- /*
- * Don't allow the user to think they can get
- * full NO_HZ with this machine.
- */
- WARN_ONCE(tick_nohz_full_running,
- "NO_HZ FULL will not work with unstable sched clock");
+ if (check_tick_dependency(&current->signal->tick_dep_mask))
return false;
- }
-#endif
return true;
}
-static void nohz_full_kick_work_func(struct irq_work *work)
+static void nohz_full_kick_func(struct irq_work *work)
{
/* Empty, the tick restart happens on tick_nohz_irq_exit() */
}
static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
- .func = nohz_full_kick_work_func,
+ .func = nohz_full_kick_func,
};
/*
@@ -214,7 +220,7 @@ static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
* This kick, unlike tick_nohz_full_kick_cpu() and tick_nohz_full_kick_all(),
* is NMI safe.
*/
-void tick_nohz_full_kick(void)
+static void tick_nohz_full_kick(void)
{
if (!tick_nohz_full_cpu(smp_processor_id()))
return;
@@ -234,27 +240,112 @@ void tick_nohz_full_kick_cpu(int cpu)
irq_work_queue_on(&per_cpu(nohz_full_kick_work, cpu), cpu);
}
-static void nohz_full_kick_ipi(void *info)
-{
- /* Empty, the tick restart happens on tick_nohz_irq_exit() */
-}
-
/*
* Kick all full dynticks CPUs in order to force these to re-evaluate
* their dependency on the tick and restart it if necessary.
*/
-void tick_nohz_full_kick_all(void)
+static void tick_nohz_full_kick_all(void)
{
+ int cpu;
+
if (!tick_nohz_full_running)
return;
preempt_disable();
- smp_call_function_many(tick_nohz_full_mask,
- nohz_full_kick_ipi, NULL, false);
- tick_nohz_full_kick();
+ for_each_cpu_and(cpu, tick_nohz_full_mask, cpu_online_mask)
+ tick_nohz_full_kick_cpu(cpu);
preempt_enable();
}
+static void tick_nohz_dep_set_all(atomic_t *dep,
+ enum tick_dep_bits bit)
+{
+ int prev;
+
+ prev = atomic_fetch_or(dep, BIT(bit));
+ if (!prev)
+ tick_nohz_full_kick_all();
+}
+
+/*
+ * Set a global tick dependency. Used by perf events that rely on freq and
+ * by unstable clock.
+ */
+void tick_nohz_dep_set(enum tick_dep_bits bit)
+{
+ tick_nohz_dep_set_all(&tick_dep_mask, bit);
+}
+
+void tick_nohz_dep_clear(enum tick_dep_bits bit)
+{
+ atomic_andnot(BIT(bit), &tick_dep_mask);
+}
+
+/*
+ * Set per-CPU tick dependency. Used by scheduler and perf events in order to
+ * manage events throttling.
+ */
+void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit)
+{
+ int prev;
+ struct tick_sched *ts;
+
+ ts = per_cpu_ptr(&tick_cpu_sched, cpu);
+
+ prev = atomic_fetch_or(&ts->tick_dep_mask, BIT(bit));
+ if (!prev) {
+ preempt_disable();
+ /* Perf needs local kick that is NMI safe */
+ if (cpu == smp_processor_id()) {
+ tick_nohz_full_kick();
+ } else {
+ /* Remote irq work not NMI-safe */
+ if (!WARN_ON_ONCE(in_nmi()))
+ tick_nohz_full_kick_cpu(cpu);
+ }
+ preempt_enable();
+ }
+}
+
+void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit)
+{
+ struct tick_sched *ts = per_cpu_ptr(&tick_cpu_sched, cpu);
+
+ atomic_andnot(BIT(bit), &ts->tick_dep_mask);
+}
+
+/*
+ * Set a per-task tick dependency. Posix CPU timers need this in order to elapse
+ * per task timers.
+ */
+void tick_nohz_dep_set_task(struct task_struct *tsk, enum tick_dep_bits bit)
+{
+ /*
+ * We could optimize this with just kicking the target running the task
+ * if that noise matters for nohz full users.
+ */
+ tick_nohz_dep_set_all(&tsk->tick_dep_mask, bit);
+}
+
+void tick_nohz_dep_clear_task(struct task_struct *tsk, enum tick_dep_bits bit)
+{
+ atomic_andnot(BIT(bit), &tsk->tick_dep_mask);
+}
+
+/*
+ * Set a per-taskgroup tick dependency. Posix CPU timers need this in order to elapse
+ * per process timers.
+ */
+void tick_nohz_dep_set_signal(struct signal_struct *sig, enum tick_dep_bits bit)
+{
+ tick_nohz_dep_set_all(&sig->tick_dep_mask, bit);
+}
+
+void tick_nohz_dep_clear_signal(struct signal_struct *sig, enum tick_dep_bits bit)
+{
+ atomic_andnot(BIT(bit), &sig->tick_dep_mask);
+}
+
/*
* Re-evaluate the need for the tick as we switch the current task.
* It might need the tick due to per task/process properties:
@@ -263,15 +354,20 @@ void tick_nohz_full_kick_all(void)
void __tick_nohz_task_switch(void)
{
unsigned long flags;
+ struct tick_sched *ts;
local_irq_save(flags);
if (!tick_nohz_full_cpu(smp_processor_id()))
goto out;
- if (tick_nohz_tick_stopped() && !can_stop_full_tick())
- tick_nohz_full_kick();
+ ts = this_cpu_ptr(&tick_cpu_sched);
+ if (ts->tick_stopped) {
+ if (atomic_read(&current->tick_dep_mask) ||
+ atomic_read(&current->signal->tick_dep_mask))
+ tick_nohz_full_kick();
+ }
out:
local_irq_restore(flags);
}
@@ -281,7 +377,7 @@ static int __init tick_nohz_full_setup(char *str)
{
alloc_bootmem_cpumask_var(&tick_nohz_full_mask);
if (cpulist_parse(str, tick_nohz_full_mask) < 0) {
- pr_warning("NOHZ: Incorrect nohz_full cpumask\n");
+ pr_warn("NO_HZ: Incorrect nohz_full cpumask\n");
free_bootmem_cpumask_var(tick_nohz_full_mask);
return 1;
}
@@ -349,8 +445,7 @@ void __init tick_nohz_init(void)
* interrupts to avoid circular dependency on the tick
*/
if (!arch_irq_work_has_interrupt()) {
- pr_warning("NO_HZ: Can't run full dynticks because arch doesn't "
- "support irq work self-IPIs\n");
+ pr_warn("NO_HZ: Can't run full dynticks because arch doesn't support irq work self-IPIs\n");
cpumask_clear(tick_nohz_full_mask);
cpumask_copy(housekeeping_mask, cpu_possible_mask);
tick_nohz_full_running = false;
@@ -360,7 +455,8 @@ void __init tick_nohz_init(void)
cpu = smp_processor_id();
if (cpumask_test_cpu(cpu, tick_nohz_full_mask)) {
- pr_warning("NO_HZ: Clearing %d from nohz_full range for timekeeping\n", cpu);
+ pr_warn("NO_HZ: Clearing %d from nohz_full range for timekeeping\n",
+ cpu);
cpumask_clear_cpu(cpu, tick_nohz_full_mask);
}
@@ -389,20 +485,14 @@ void __init tick_nohz_init(void)
/*
* NO HZ enabled ?
*/
-int tick_nohz_enabled __read_mostly = 1;
+bool tick_nohz_enabled __read_mostly = true;
unsigned long tick_nohz_active __read_mostly;
/*
* Enable / Disable tickless mode
*/
static int __init setup_tick_nohz(char *str)
{
- if (!strcmp(str, "off"))
- tick_nohz_enabled = 0;
- else if (!strcmp(str, "on"))
- tick_nohz_enabled = 1;
- else
- return 0;
- return 1;
+ return (kstrtobool(str, &tick_nohz_enabled) == 0);
}
__setup("nohz=", setup_tick_nohz);
@@ -689,7 +779,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
ts->last_tick = hrtimer_get_expires(&ts->sched_timer);
ts->tick_stopped = 1;
- trace_tick_stop(1, " ");
+ trace_tick_stop(1, TICK_DEP_MASK_NONE);
}
/*
@@ -740,7 +830,7 @@ static void tick_nohz_full_update_tick(struct tick_sched *ts)
if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE)
return;
- if (can_stop_full_tick())
+ if (can_stop_full_tick(ts))
tick_nohz_stop_sched_tick(ts, ktime_get(), cpu);
else if (ts->tick_stopped)
tick_nohz_restart_sched_tick(ts, ktime_get(), 1);
diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h
index a4a8d4e9b..bf38226e5 100644
--- a/kernel/time/tick-sched.h
+++ b/kernel/time/tick-sched.h
@@ -60,6 +60,7 @@ struct tick_sched {
u64 next_timer;
ktime_t idle_expires;
int do_timer_last;
+ atomic_t tick_dep_mask;
};
extern struct tick_sched *tick_get_tick_sched(int cpu);
diff --git a/kernel/time/time.c b/kernel/time/time.c
index 86751c68e..be115b020 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -322,6 +322,13 @@ EXPORT_SYMBOL(timespec_trunc);
* -year/100+year/400 terms, and add 10.]
*
* This algorithm was first published by Gauss (I think).
+ *
+ * A leap second can be indicated by calling this function with sec as
+ * 60 (allowable under ISO 8601). The leap second is treated the same
+ * as the following second since they don't exist in UNIX time.
+ *
+ * An encoding of midnight at the end of the day as 24:00:00 - ie. midnight
+ * tomorrow - (allowable under ISO 8601) is supported.
*/
time64_t mktime64(const unsigned int year0, const unsigned int mon0,
const unsigned int day, const unsigned int hour,
@@ -338,7 +345,7 @@ time64_t mktime64(const unsigned int year0, const unsigned int mon0,
return ((((time64_t)
(year/4 - year/100 + year/400 + 367*mon/12 + day) +
year*365 - 719499
- )*24 + hour /* now have hours */
+ )*24 + hour /* now have hours - midnight tomorrow handled here */
)*60 + min /* now have minutes */
)*60 + sec; /* finally seconds */
}
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 34b4cedfa..479d25cd3 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -131,7 +131,7 @@ static void timekeeping_check_update(struct timekeeper *tk, cycle_t offset)
printk_deferred(" timekeeping: Your kernel is sick, but tries to cope by capping time updates\n");
} else {
if (offset > (max_cycles >> 1)) {
- printk_deferred("INFO: timekeeping: Cycle offset (%lld) is larger than the the '%s' clock's 50%% safety margin (%lld)\n",
+ printk_deferred("INFO: timekeeping: Cycle offset (%lld) is larger than the '%s' clock's 50%% safety margin (%lld)\n",
offset, name, max_cycles >> 1);
printk_deferred(" timekeeping: Your kernel is still fine, but is feeling a bit nervous\n");
}
@@ -233,6 +233,7 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
u64 tmp, ntpinterval;
struct clocksource *old_clock;
+ ++tk->cs_was_changed_seq;
old_clock = tk->tkr_mono.clock;
tk->tkr_mono.clock = clock;
tk->tkr_mono.read = clock->read;
@@ -298,17 +299,34 @@ u32 (*arch_gettimeoffset)(void) = default_arch_gettimeoffset;
static inline u32 arch_gettimeoffset(void) { return 0; }
#endif
+static inline s64 timekeeping_delta_to_ns(struct tk_read_base *tkr,
+ cycle_t delta)
+{
+ s64 nsec;
+
+ nsec = delta * tkr->mult + tkr->xtime_nsec;
+ nsec >>= tkr->shift;
+
+ /* If arch requires, add in get_arch_timeoffset() */
+ return nsec + arch_gettimeoffset();
+}
+
static inline s64 timekeeping_get_ns(struct tk_read_base *tkr)
{
cycle_t delta;
- s64 nsec;
delta = timekeeping_get_delta(tkr);
+ return timekeeping_delta_to_ns(tkr, delta);
+}
- nsec = (delta * tkr->mult + tkr->xtime_nsec) >> tkr->shift;
+static inline s64 timekeeping_cycles_to_ns(struct tk_read_base *tkr,
+ cycle_t cycles)
+{
+ cycle_t delta;
- /* If arch requires, add in get_arch_timeoffset() */
- return nsec + arch_gettimeoffset();
+ /* calculate the delta since the last update_wall_time */
+ delta = clocksource_delta(cycles, tkr->cycle_last, tkr->mask);
+ return timekeeping_delta_to_ns(tkr, delta);
}
/**
@@ -857,44 +875,262 @@ time64_t __ktime_get_real_seconds(void)
return tk->xtime_sec;
}
+/**
+ * ktime_get_snapshot - snapshots the realtime/monotonic raw clocks with counter
+ * @systime_snapshot: pointer to struct receiving the system time snapshot
+ */
+void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot)
+{
+ struct timekeeper *tk = &tk_core.timekeeper;
+ unsigned long seq;
+ ktime_t base_raw;
+ ktime_t base_real;
+ s64 nsec_raw;
+ s64 nsec_real;
+ cycle_t now;
-#ifdef CONFIG_NTP_PPS
+ WARN_ON_ONCE(timekeeping_suspended);
+
+ do {
+ seq = read_seqcount_begin(&tk_core.seq);
+
+ now = tk->tkr_mono.read(tk->tkr_mono.clock);
+ systime_snapshot->cs_was_changed_seq = tk->cs_was_changed_seq;
+ systime_snapshot->clock_was_set_seq = tk->clock_was_set_seq;
+ base_real = ktime_add(tk->tkr_mono.base,
+ tk_core.timekeeper.offs_real);
+ base_raw = tk->tkr_raw.base;
+ nsec_real = timekeeping_cycles_to_ns(&tk->tkr_mono, now);
+ nsec_raw = timekeeping_cycles_to_ns(&tk->tkr_raw, now);
+ } while (read_seqcount_retry(&tk_core.seq, seq));
+
+ systime_snapshot->cycles = now;
+ systime_snapshot->real = ktime_add_ns(base_real, nsec_real);
+ systime_snapshot->raw = ktime_add_ns(base_raw, nsec_raw);
+}
+EXPORT_SYMBOL_GPL(ktime_get_snapshot);
+
+/* Scale base by mult/div checking for overflow */
+static int scale64_check_overflow(u64 mult, u64 div, u64 *base)
+{
+ u64 tmp, rem;
+
+ tmp = div64_u64_rem(*base, div, &rem);
+
+ if (((int)sizeof(u64)*8 - fls64(mult) < fls64(tmp)) ||
+ ((int)sizeof(u64)*8 - fls64(mult) < fls64(rem)))
+ return -EOVERFLOW;
+ tmp *= mult;
+ rem *= mult;
+
+ do_div(rem, div);
+ *base = tmp + rem;
+ return 0;
+}
/**
- * ktime_get_raw_and_real_ts64 - get day and raw monotonic time in timespec format
- * @ts_raw: pointer to the timespec to be set to raw monotonic time
- * @ts_real: pointer to the timespec to be set to the time of day
+ * adjust_historical_crosststamp - adjust crosstimestamp previous to current interval
+ * @history: Snapshot representing start of history
+ * @partial_history_cycles: Cycle offset into history (fractional part)
+ * @total_history_cycles: Total history length in cycles
+ * @discontinuity: True indicates clock was set on history period
+ * @ts: Cross timestamp that should be adjusted using
+ * partial/total ratio
*
- * This function reads both the time of day and raw monotonic time at the
- * same time atomically and stores the resulting timestamps in timespec
- * format.
+ * Helper function used by get_device_system_crosststamp() to correct the
+ * crosstimestamp corresponding to the start of the current interval to the
+ * system counter value (timestamp point) provided by the driver. The
+ * total_history_* quantities are the total history starting at the provided
+ * reference point and ending at the start of the current interval. The cycle
+ * count between the driver timestamp point and the start of the current
+ * interval is partial_history_cycles.
*/
-void ktime_get_raw_and_real_ts64(struct timespec64 *ts_raw, struct timespec64 *ts_real)
+static int adjust_historical_crosststamp(struct system_time_snapshot *history,
+ cycle_t partial_history_cycles,
+ cycle_t total_history_cycles,
+ bool discontinuity,
+ struct system_device_crosststamp *ts)
{
struct timekeeper *tk = &tk_core.timekeeper;
- unsigned long seq;
- s64 nsecs_raw, nsecs_real;
+ u64 corr_raw, corr_real;
+ bool interp_forward;
+ int ret;
- WARN_ON_ONCE(timekeeping_suspended);
+ if (total_history_cycles == 0 || partial_history_cycles == 0)
+ return 0;
+
+ /* Interpolate shortest distance from beginning or end of history */
+ interp_forward = partial_history_cycles > total_history_cycles/2 ?
+ true : false;
+ partial_history_cycles = interp_forward ?
+ total_history_cycles - partial_history_cycles :
+ partial_history_cycles;
+
+ /*
+ * Scale the monotonic raw time delta by:
+ * partial_history_cycles / total_history_cycles
+ */
+ corr_raw = (u64)ktime_to_ns(
+ ktime_sub(ts->sys_monoraw, history->raw));
+ ret = scale64_check_overflow(partial_history_cycles,
+ total_history_cycles, &corr_raw);
+ if (ret)
+ return ret;
+
+ /*
+ * If there is a discontinuity in the history, scale monotonic raw
+ * correction by:
+ * mult(real)/mult(raw) yielding the realtime correction
+ * Otherwise, calculate the realtime correction similar to monotonic
+ * raw calculation
+ */
+ if (discontinuity) {
+ corr_real = mul_u64_u32_div
+ (corr_raw, tk->tkr_mono.mult, tk->tkr_raw.mult);
+ } else {
+ corr_real = (u64)ktime_to_ns(
+ ktime_sub(ts->sys_realtime, history->real));
+ ret = scale64_check_overflow(partial_history_cycles,
+ total_history_cycles, &corr_real);
+ if (ret)
+ return ret;
+ }
+
+ /* Fixup monotonic raw and real time time values */
+ if (interp_forward) {
+ ts->sys_monoraw = ktime_add_ns(history->raw, corr_raw);
+ ts->sys_realtime = ktime_add_ns(history->real, corr_real);
+ } else {
+ ts->sys_monoraw = ktime_sub_ns(ts->sys_monoraw, corr_raw);
+ ts->sys_realtime = ktime_sub_ns(ts->sys_realtime, corr_real);
+ }
+
+ return 0;
+}
+
+/*
+ * cycle_between - true if test occurs chronologically between before and after
+ */
+static bool cycle_between(cycle_t before, cycle_t test, cycle_t after)
+{
+ if (test > before && test < after)
+ return true;
+ if (test < before && before > after)
+ return true;
+ return false;
+}
+
+/**
+ * get_device_system_crosststamp - Synchronously capture system/device timestamp
+ * @get_time_fn: Callback to get simultaneous device time and
+ * system counter from the device driver
+ * @ctx: Context passed to get_time_fn()
+ * @history_begin: Historical reference point used to interpolate system
+ * time when counter provided by the driver is before the current interval
+ * @xtstamp: Receives simultaneously captured system and device time
+ *
+ * Reads a timestamp from a device and correlates it to system time
+ */
+int get_device_system_crosststamp(int (*get_time_fn)
+ (ktime_t *device_time,
+ struct system_counterval_t *sys_counterval,
+ void *ctx),
+ void *ctx,
+ struct system_time_snapshot *history_begin,
+ struct system_device_crosststamp *xtstamp)
+{
+ struct system_counterval_t system_counterval;
+ struct timekeeper *tk = &tk_core.timekeeper;
+ cycle_t cycles, now, interval_start;
+ unsigned int clock_was_set_seq = 0;
+ ktime_t base_real, base_raw;
+ s64 nsec_real, nsec_raw;
+ u8 cs_was_changed_seq;
+ unsigned long seq;
+ bool do_interp;
+ int ret;
do {
seq = read_seqcount_begin(&tk_core.seq);
+ /*
+ * Try to synchronously capture device time and a system
+ * counter value calling back into the device driver
+ */
+ ret = get_time_fn(&xtstamp->device, &system_counterval, ctx);
+ if (ret)
+ return ret;
+
+ /*
+ * Verify that the clocksource associated with the captured
+ * system counter value is the same as the currently installed
+ * timekeeper clocksource
+ */
+ if (tk->tkr_mono.clock != system_counterval.cs)
+ return -ENODEV;
+ cycles = system_counterval.cycles;
- *ts_raw = tk->raw_time;
- ts_real->tv_sec = tk->xtime_sec;
- ts_real->tv_nsec = 0;
+ /*
+ * Check whether the system counter value provided by the
+ * device driver is on the current timekeeping interval.
+ */
+ now = tk->tkr_mono.read(tk->tkr_mono.clock);
+ interval_start = tk->tkr_mono.cycle_last;
+ if (!cycle_between(interval_start, cycles, now)) {
+ clock_was_set_seq = tk->clock_was_set_seq;
+ cs_was_changed_seq = tk->cs_was_changed_seq;
+ cycles = interval_start;
+ do_interp = true;
+ } else {
+ do_interp = false;
+ }
- nsecs_raw = timekeeping_get_ns(&tk->tkr_raw);
- nsecs_real = timekeeping_get_ns(&tk->tkr_mono);
+ base_real = ktime_add(tk->tkr_mono.base,
+ tk_core.timekeeper.offs_real);
+ base_raw = tk->tkr_raw.base;
+ nsec_real = timekeeping_cycles_to_ns(&tk->tkr_mono,
+ system_counterval.cycles);
+ nsec_raw = timekeeping_cycles_to_ns(&tk->tkr_raw,
+ system_counterval.cycles);
} while (read_seqcount_retry(&tk_core.seq, seq));
- timespec64_add_ns(ts_raw, nsecs_raw);
- timespec64_add_ns(ts_real, nsecs_real);
-}
-EXPORT_SYMBOL(ktime_get_raw_and_real_ts64);
+ xtstamp->sys_realtime = ktime_add_ns(base_real, nsec_real);
+ xtstamp->sys_monoraw = ktime_add_ns(base_raw, nsec_raw);
-#endif /* CONFIG_NTP_PPS */
+ /*
+ * Interpolate if necessary, adjusting back from the start of the
+ * current interval
+ */
+ if (do_interp) {
+ cycle_t partial_history_cycles, total_history_cycles;
+ bool discontinuity;
+
+ /*
+ * Check that the counter value occurs after the provided
+ * history reference and that the history doesn't cross a
+ * clocksource change
+ */
+ if (!history_begin ||
+ !cycle_between(history_begin->cycles,
+ system_counterval.cycles, cycles) ||
+ history_begin->cs_was_changed_seq != cs_was_changed_seq)
+ return -EINVAL;
+ partial_history_cycles = cycles - system_counterval.cycles;
+ total_history_cycles = cycles - history_begin->cycles;
+ discontinuity =
+ history_begin->clock_was_set_seq != clock_was_set_seq;
+
+ ret = adjust_historical_crosststamp(history_begin,
+ partial_history_cycles,
+ total_history_cycles,
+ discontinuity, xtstamp);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(get_device_system_crosststamp);
/**
* do_gettimeofday - Returns the time of day in a timeval
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index bbc5d1114..73164c3aa 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1566,6 +1566,17 @@ signed long __sched schedule_timeout_uninterruptible(signed long timeout)
}
EXPORT_SYMBOL(schedule_timeout_uninterruptible);
+/*
+ * Like schedule_timeout_uninterruptible(), except this task will not contribute
+ * to load average.
+ */
+signed long __sched schedule_timeout_idle(signed long timeout)
+{
+ __set_current_state(TASK_IDLE);
+ return schedule_timeout(timeout);
+}
+EXPORT_SYMBOL(schedule_timeout_idle);
+
#ifdef CONFIG_HOTPLUG_CPU
static void migrate_timer_list(struct tvec_base *new_base, struct hlist_head *head)
{
@@ -1698,10 +1709,10 @@ EXPORT_SYMBOL(msleep_interruptible);
static void __sched do_usleep_range(unsigned long min, unsigned long max)
{
ktime_t kmin;
- unsigned long delta;
+ u64 delta;
kmin = ktime_set(0, min * NSEC_PER_USEC);
- delta = (max - min) * NSEC_PER_USEC;
+ delta = (u64)(max - min) * NSEC_PER_USEC;
schedule_hrtimeout_range(&kmin, delta, HRTIMER_MODE_REL);
}