summaryrefslogtreecommitdiff
path: root/kernel/locking
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/locking')
-rw-r--r--kernel/locking/lockdep.c4
-rw-r--r--kernel/locking/lockdep_proc.c2
-rw-r--r--kernel/locking/locktorture.c164
-rw-r--r--kernel/locking/mcs_spinlock.h4
-rw-r--r--kernel/locking/mutex.c9
-rw-r--r--kernel/locking/osq_lock.c11
-rw-r--r--kernel/locking/percpu-rwsem.c90
-rw-r--r--kernel/locking/qrwlock.c8
-rw-r--r--kernel/locking/qspinlock_paravirt.h6
-rw-r--r--kernel/locking/rtmutex.c33
-rw-r--r--kernel/locking/rwsem-xadd.c5
11 files changed, 244 insertions, 92 deletions
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 4e49cc4c9..60ace5661 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -6,7 +6,7 @@
* Started by Ingo Molnar:
*
* Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
- * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
*
* this code maps all the lock dependencies as they occur in a live kernel
* and will warn about the following classes of locking bugs:
@@ -2738,7 +2738,7 @@ static void __lockdep_trace_alloc(gfp_t gfp_mask, unsigned long flags)
return;
/* no reclaim without waiting on it */
- if (!(gfp_mask & __GFP_WAIT))
+ if (!(gfp_mask & __GFP_DIRECT_RECLAIM))
return;
/* this guy won't enter reclaim */
diff --git a/kernel/locking/lockdep_proc.c b/kernel/locking/lockdep_proc.c
index d83d798be..dbb61a302 100644
--- a/kernel/locking/lockdep_proc.c
+++ b/kernel/locking/lockdep_proc.c
@@ -6,7 +6,7 @@
* Started by Ingo Molnar:
*
* Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
- * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
*
* Code for /proc/lockdep and /proc/lockdep_stats:
*
diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
index 32244186f..8ef1919d6 100644
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@@ -17,12 +17,14 @@
*
* Copyright (C) IBM Corporation, 2014
*
- * Author: Paul E. McKenney <paulmck@us.ibm.com>
+ * Authors: Paul E. McKenney <paulmck@us.ibm.com>
+ * Davidlohr Bueso <dave@stgolabs.net>
* Based on kernel/rcu/torture.c.
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/kthread.h>
+#include <linux/sched/rt.h>
#include <linux/spinlock.h>
#include <linux/rwlock.h>
#include <linux/mutex.h>
@@ -34,6 +36,7 @@
#include <linux/moduleparam.h>
#include <linux/delay.h>
#include <linux/slab.h>
+#include <linux/percpu-rwsem.h>
#include <linux/torture.h>
MODULE_LICENSE("GPL");
@@ -91,11 +94,13 @@ struct lock_torture_ops {
void (*init)(void);
int (*writelock)(void);
void (*write_delay)(struct torture_random_state *trsp);
+ void (*task_boost)(struct torture_random_state *trsp);
void (*writeunlock)(void);
int (*readlock)(void);
void (*read_delay)(struct torture_random_state *trsp);
void (*readunlock)(void);
- unsigned long flags;
+
+ unsigned long flags; /* for irq spinlocks */
const char *name;
};
@@ -139,9 +144,15 @@ static void torture_lock_busted_write_unlock(void)
/* BUGGY, do not use in real life!!! */
}
+static void torture_boost_dummy(struct torture_random_state *trsp)
+{
+ /* Only rtmutexes care about priority */
+}
+
static struct lock_torture_ops lock_busted_ops = {
.writelock = torture_lock_busted_write_lock,
.write_delay = torture_lock_busted_write_delay,
+ .task_boost = torture_boost_dummy,
.writeunlock = torture_lock_busted_write_unlock,
.readlock = NULL,
.read_delay = NULL,
@@ -185,6 +196,7 @@ static void torture_spin_lock_write_unlock(void) __releases(torture_spinlock)
static struct lock_torture_ops spin_lock_ops = {
.writelock = torture_spin_lock_write_lock,
.write_delay = torture_spin_lock_write_delay,
+ .task_boost = torture_boost_dummy,
.writeunlock = torture_spin_lock_write_unlock,
.readlock = NULL,
.read_delay = NULL,
@@ -211,6 +223,7 @@ __releases(torture_spinlock)
static struct lock_torture_ops spin_lock_irq_ops = {
.writelock = torture_spin_lock_write_lock_irq,
.write_delay = torture_spin_lock_write_delay,
+ .task_boost = torture_boost_dummy,
.writeunlock = torture_lock_spin_write_unlock_irq,
.readlock = NULL,
.read_delay = NULL,
@@ -275,6 +288,7 @@ static void torture_rwlock_read_unlock(void) __releases(torture_rwlock)
static struct lock_torture_ops rw_lock_ops = {
.writelock = torture_rwlock_write_lock,
.write_delay = torture_rwlock_write_delay,
+ .task_boost = torture_boost_dummy,
.writeunlock = torture_rwlock_write_unlock,
.readlock = torture_rwlock_read_lock,
.read_delay = torture_rwlock_read_delay,
@@ -315,6 +329,7 @@ __releases(torture_rwlock)
static struct lock_torture_ops rw_lock_irq_ops = {
.writelock = torture_rwlock_write_lock_irq,
.write_delay = torture_rwlock_write_delay,
+ .task_boost = torture_boost_dummy,
.writeunlock = torture_rwlock_write_unlock_irq,
.readlock = torture_rwlock_read_lock_irq,
.read_delay = torture_rwlock_read_delay,
@@ -354,6 +369,7 @@ static void torture_mutex_unlock(void) __releases(torture_mutex)
static struct lock_torture_ops mutex_lock_ops = {
.writelock = torture_mutex_lock,
.write_delay = torture_mutex_delay,
+ .task_boost = torture_boost_dummy,
.writeunlock = torture_mutex_unlock,
.readlock = NULL,
.read_delay = NULL,
@@ -361,6 +377,90 @@ static struct lock_torture_ops mutex_lock_ops = {
.name = "mutex_lock"
};
+#ifdef CONFIG_RT_MUTEXES
+static DEFINE_RT_MUTEX(torture_rtmutex);
+
+static int torture_rtmutex_lock(void) __acquires(torture_rtmutex)
+{
+ rt_mutex_lock(&torture_rtmutex);
+ return 0;
+}
+
+static void torture_rtmutex_boost(struct torture_random_state *trsp)
+{
+ int policy;
+ struct sched_param param;
+ const unsigned int factor = 50000; /* yes, quite arbitrary */
+
+ if (!rt_task(current)) {
+ /*
+ * (1) Boost priority once every ~50k operations. When the
+ * task tries to take the lock, the rtmutex it will account
+ * for the new priority, and do any corresponding pi-dance.
+ */
+ if (!(torture_random(trsp) %
+ (cxt.nrealwriters_stress * factor))) {
+ policy = SCHED_FIFO;
+ param.sched_priority = MAX_RT_PRIO - 1;
+ } else /* common case, do nothing */
+ return;
+ } else {
+ /*
+ * The task will remain boosted for another ~500k operations,
+ * then restored back to its original prio, and so forth.
+ *
+ * When @trsp is nil, we want to force-reset the task for
+ * stopping the kthread.
+ */
+ if (!trsp || !(torture_random(trsp) %
+ (cxt.nrealwriters_stress * factor * 2))) {
+ policy = SCHED_NORMAL;
+ param.sched_priority = 0;
+ } else /* common case, do nothing */
+ return;
+ }
+
+ sched_setscheduler_nocheck(current, policy, &param);
+}
+
+static void torture_rtmutex_delay(struct torture_random_state *trsp)
+{
+ const unsigned long shortdelay_us = 2;
+ const unsigned long longdelay_ms = 100;
+
+ /*
+ * We want a short delay mostly to emulate likely code, and
+ * we want a long delay occasionally to force massive contention.
+ */
+ if (!(torture_random(trsp) %
+ (cxt.nrealwriters_stress * 2000 * longdelay_ms)))
+ mdelay(longdelay_ms);
+ if (!(torture_random(trsp) %
+ (cxt.nrealwriters_stress * 2 * shortdelay_us)))
+ udelay(shortdelay_us);
+#ifdef CONFIG_PREEMPT
+ if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000)))
+ preempt_schedule(); /* Allow test to be preempted. */
+#endif
+}
+
+static void torture_rtmutex_unlock(void) __releases(torture_rtmutex)
+{
+ rt_mutex_unlock(&torture_rtmutex);
+}
+
+static struct lock_torture_ops rtmutex_lock_ops = {
+ .writelock = torture_rtmutex_lock,
+ .write_delay = torture_rtmutex_delay,
+ .task_boost = torture_rtmutex_boost,
+ .writeunlock = torture_rtmutex_unlock,
+ .readlock = NULL,
+ .read_delay = NULL,
+ .readunlock = NULL,
+ .name = "rtmutex_lock"
+};
+#endif
+
static DECLARE_RWSEM(torture_rwsem);
static int torture_rwsem_down_write(void) __acquires(torture_rwsem)
{
@@ -419,6 +519,7 @@ static void torture_rwsem_up_read(void) __releases(torture_rwsem)
static struct lock_torture_ops rwsem_lock_ops = {
.writelock = torture_rwsem_down_write,
.write_delay = torture_rwsem_write_delay,
+ .task_boost = torture_boost_dummy,
.writeunlock = torture_rwsem_up_write,
.readlock = torture_rwsem_down_read,
.read_delay = torture_rwsem_read_delay,
@@ -426,6 +527,48 @@ static struct lock_torture_ops rwsem_lock_ops = {
.name = "rwsem_lock"
};
+#include <linux/percpu-rwsem.h>
+static struct percpu_rw_semaphore pcpu_rwsem;
+
+void torture_percpu_rwsem_init(void)
+{
+ BUG_ON(percpu_init_rwsem(&pcpu_rwsem));
+}
+
+static int torture_percpu_rwsem_down_write(void) __acquires(pcpu_rwsem)
+{
+ percpu_down_write(&pcpu_rwsem);
+ return 0;
+}
+
+static void torture_percpu_rwsem_up_write(void) __releases(pcpu_rwsem)
+{
+ percpu_up_write(&pcpu_rwsem);
+}
+
+static int torture_percpu_rwsem_down_read(void) __acquires(pcpu_rwsem)
+{
+ percpu_down_read(&pcpu_rwsem);
+ return 0;
+}
+
+static void torture_percpu_rwsem_up_read(void) __releases(pcpu_rwsem)
+{
+ percpu_up_read(&pcpu_rwsem);
+}
+
+static struct lock_torture_ops percpu_rwsem_lock_ops = {
+ .init = torture_percpu_rwsem_init,
+ .writelock = torture_percpu_rwsem_down_write,
+ .write_delay = torture_rwsem_write_delay,
+ .task_boost = torture_boost_dummy,
+ .writeunlock = torture_percpu_rwsem_up_write,
+ .readlock = torture_percpu_rwsem_down_read,
+ .read_delay = torture_rwsem_read_delay,
+ .readunlock = torture_percpu_rwsem_up_read,
+ .name = "percpu_rwsem_lock"
+};
+
/*
* Lock torture writer kthread. Repeatedly acquires and releases
* the lock, checking for duplicate acquisitions.
@@ -442,6 +585,7 @@ static int lock_torture_writer(void *arg)
if ((torture_random(&rand) & 0xfffff) == 0)
schedule_timeout_uninterruptible(1);
+ cxt.cur_ops->task_boost(&rand);
cxt.cur_ops->writelock();
if (WARN_ON_ONCE(lock_is_write_held))
lwsp->n_lock_fail++;
@@ -456,6 +600,8 @@ static int lock_torture_writer(void *arg)
stutter_wait("lock_torture_writer");
} while (!torture_must_stop());
+
+ cxt.cur_ops->task_boost(NULL); /* reset prio */
torture_kthread_stopping("lock_torture_writer");
return 0;
}
@@ -642,7 +788,11 @@ static int __init lock_torture_init(void)
&spin_lock_ops, &spin_lock_irq_ops,
&rw_lock_ops, &rw_lock_irq_ops,
&mutex_lock_ops,
+#ifdef CONFIG_RT_MUTEXES
+ &rtmutex_lock_ops,
+#endif
&rwsem_lock_ops,
+ &percpu_rwsem_lock_ops,
};
if (!torture_init_begin(torture_type, verbose, &torture_runnable))
@@ -661,11 +811,11 @@ static int __init lock_torture_init(void)
for (i = 0; i < ARRAY_SIZE(torture_ops); i++)
pr_alert(" %s", torture_ops[i]->name);
pr_alert("\n");
- torture_init_end();
- return -EINVAL;
+ firsterr = -EINVAL;
+ goto unwind;
}
if (cxt.cur_ops->init)
- cxt.cur_ops->init(); /* no "goto unwind" prior to this point!!! */
+ cxt.cur_ops->init();
if (nwriters_stress >= 0)
cxt.nrealwriters_stress = nwriters_stress;
@@ -676,6 +826,10 @@ static int __init lock_torture_init(void)
if (strncmp(torture_type, "mutex", 5) == 0)
cxt.debug_lock = true;
#endif
+#ifdef CONFIG_DEBUG_RT_MUTEXES
+ if (strncmp(torture_type, "rtmutex", 7) == 0)
+ cxt.debug_lock = true;
+#endif
#ifdef CONFIG_DEBUG_SPINLOCK
if ((strncmp(torture_type, "spin", 4) == 0) ||
(strncmp(torture_type, "rw_lock", 7) == 0))
diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h
index fd91aaa45..5b9102a47 100644
--- a/kernel/locking/mcs_spinlock.h
+++ b/kernel/locking/mcs_spinlock.h
@@ -67,7 +67,7 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
node->locked = 0;
node->next = NULL;
- prev = xchg(lock, node);
+ prev = xchg_acquire(lock, node);
if (likely(prev == NULL)) {
/*
* Lock acquired, don't need to set node->locked to 1. Threads
@@ -98,7 +98,7 @@ void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
/*
* Release the lock by setting it to NULL
*/
- if (likely(cmpxchg(lock, node, NULL) == node))
+ if (likely(cmpxchg_release(lock, node, NULL) == node))
return;
/* Wait until the next pointer is set */
while (!(next = READ_ONCE(node->next)))
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 4cccea6b8..0551c219c 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -277,7 +277,7 @@ static inline int mutex_can_spin_on_owner(struct mutex *lock)
static inline bool mutex_try_to_acquire(struct mutex *lock)
{
return !mutex_is_locked(lock) &&
- (atomic_cmpxchg(&lock->count, 1, 0) == 1);
+ (atomic_cmpxchg_acquire(&lock->count, 1, 0) == 1);
}
/*
@@ -529,7 +529,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
* Once more, try to acquire the lock. Only try-lock the mutex if
* it is unlocked to reduce unnecessary xchg() operations.
*/
- if (!mutex_is_locked(lock) && (atomic_xchg(&lock->count, 0) == 1))
+ if (!mutex_is_locked(lock) &&
+ (atomic_xchg_acquire(&lock->count, 0) == 1))
goto skip_wait;
debug_mutex_lock_common(lock, &waiter);
@@ -553,7 +554,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
* non-negative in order to avoid unnecessary xchg operations:
*/
if (atomic_read(&lock->count) >= 0 &&
- (atomic_xchg(&lock->count, -1) == 1))
+ (atomic_xchg_acquire(&lock->count, -1) == 1))
break;
/*
@@ -867,7 +868,7 @@ static inline int __mutex_trylock_slowpath(atomic_t *lock_count)
spin_lock_mutex(&lock->wait_lock, flags);
- prev = atomic_xchg(&lock->count, -1);
+ prev = atomic_xchg_acquire(&lock->count, -1);
if (likely(prev == 1)) {
mutex_set_owner(lock);
mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);
diff --git a/kernel/locking/osq_lock.c b/kernel/locking/osq_lock.c
index dc85ee23a..05a37857a 100644
--- a/kernel/locking/osq_lock.c
+++ b/kernel/locking/osq_lock.c
@@ -50,7 +50,7 @@ osq_wait_next(struct optimistic_spin_queue *lock,
for (;;) {
if (atomic_read(&lock->tail) == curr &&
- atomic_cmpxchg(&lock->tail, curr, old) == curr) {
+ atomic_cmpxchg_acquire(&lock->tail, curr, old) == curr) {
/*
* We were the last queued, we moved @lock back. @prev
* will now observe @lock and will complete its
@@ -92,6 +92,12 @@ bool osq_lock(struct optimistic_spin_queue *lock)
node->next = NULL;
node->cpu = curr;
+ /*
+ * We need both ACQUIRE (pairs with corresponding RELEASE in
+ * unlock() uncontended, or fastpath) and RELEASE (to publish
+ * the node fields we just initialised) semantics when updating
+ * the lock tail.
+ */
old = atomic_xchg(&lock->tail, curr);
if (old == OSQ_UNLOCKED_VAL)
return true;
@@ -184,7 +190,8 @@ void osq_unlock(struct optimistic_spin_queue *lock)
/*
* Fast path for the uncontended case.
*/
- if (likely(atomic_cmpxchg(&lock->tail, curr, OSQ_UNLOCKED_VAL) == curr))
+ if (likely(atomic_cmpxchg_release(&lock->tail, curr,
+ OSQ_UNLOCKED_VAL) == curr))
return;
/*
diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index f32567254..f231e0bb3 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -17,50 +17,43 @@ int __percpu_init_rwsem(struct percpu_rw_semaphore *brw,
/* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */
__init_rwsem(&brw->rw_sem, name, rwsem_key);
- atomic_set(&brw->write_ctr, 0);
+ rcu_sync_init(&brw->rss, RCU_SCHED_SYNC);
atomic_set(&brw->slow_read_ctr, 0);
init_waitqueue_head(&brw->write_waitq);
return 0;
}
+EXPORT_SYMBOL_GPL(__percpu_init_rwsem);
void percpu_free_rwsem(struct percpu_rw_semaphore *brw)
{
+ /*
+ * XXX: temporary kludge. The error path in alloc_super()
+ * assumes that percpu_free_rwsem() is safe after kzalloc().
+ */
+ if (!brw->fast_read_ctr)
+ return;
+
+ rcu_sync_dtor(&brw->rss);
free_percpu(brw->fast_read_ctr);
brw->fast_read_ctr = NULL; /* catch use after free bugs */
}
/*
- * This is the fast-path for down_read/up_read, it only needs to ensure
- * there is no pending writer (atomic_read(write_ctr) == 0) and inc/dec the
- * fast per-cpu counter. The writer uses synchronize_sched_expedited() to
- * serialize with the preempt-disabled section below.
- *
- * The nontrivial part is that we should guarantee acquire/release semantics
- * in case when
- *
- * R_W: down_write() comes after up_read(), the writer should see all
- * changes done by the reader
- * or
- * W_R: down_read() comes after up_write(), the reader should see all
- * changes done by the writer
+ * This is the fast-path for down_read/up_read. If it succeeds we rely
+ * on the barriers provided by rcu_sync_enter/exit; see the comments in
+ * percpu_down_write() and percpu_up_write().
*
* If this helper fails the callers rely on the normal rw_semaphore and
* atomic_dec_and_test(), so in this case we have the necessary barriers.
- *
- * But if it succeeds we do not have any barriers, atomic_read(write_ctr) or
- * __this_cpu_add() below can be reordered with any LOAD/STORE done by the
- * reader inside the critical section. See the comments in down_write and
- * up_write below.
*/
static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val)
{
- bool success = false;
+ bool success;
preempt_disable();
- if (likely(!atomic_read(&brw->write_ctr))) {
+ success = rcu_sync_is_idle(&brw->rss);
+ if (likely(success))
__this_cpu_add(*brw->fast_read_ctr, val);
- success = true;
- }
preempt_enable();
return success;
@@ -77,16 +70,17 @@ static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val)
void percpu_down_read(struct percpu_rw_semaphore *brw)
{
might_sleep();
- if (likely(update_fast_ctr(brw, +1))) {
- rwsem_acquire_read(&brw->rw_sem.dep_map, 0, 0, _RET_IP_);
+ rwsem_acquire_read(&brw->rw_sem.dep_map, 0, 0, _RET_IP_);
+
+ if (likely(update_fast_ctr(brw, +1)))
return;
- }
- down_read(&brw->rw_sem);
+ /* Avoid rwsem_acquire_read() and rwsem_release() */
+ __down_read(&brw->rw_sem);
atomic_inc(&brw->slow_read_ctr);
- /* avoid up_read()->rwsem_release() */
__up_read(&brw->rw_sem);
}
+EXPORT_SYMBOL_GPL(percpu_down_read);
int percpu_down_read_trylock(struct percpu_rw_semaphore *brw)
{
@@ -112,6 +106,7 @@ void percpu_up_read(struct percpu_rw_semaphore *brw)
if (atomic_dec_and_test(&brw->slow_read_ctr))
wake_up_all(&brw->write_waitq);
}
+EXPORT_SYMBOL_GPL(percpu_up_read);
static int clear_fast_ctr(struct percpu_rw_semaphore *brw)
{
@@ -126,33 +121,17 @@ static int clear_fast_ctr(struct percpu_rw_semaphore *brw)
return sum;
}
-/*
- * A writer increments ->write_ctr to force the readers to switch to the
- * slow mode, note the atomic_read() check in update_fast_ctr().
- *
- * After that the readers can only inc/dec the slow ->slow_read_ctr counter,
- * ->fast_read_ctr is stable. Once the writer moves its sum into the slow
- * counter it represents the number of active readers.
- *
- * Finally the writer takes ->rw_sem for writing and blocks the new readers,
- * then waits until the slow counter becomes zero.
- */
void percpu_down_write(struct percpu_rw_semaphore *brw)
{
- /* tell update_fast_ctr() there is a pending writer */
- atomic_inc(&brw->write_ctr);
/*
- * 1. Ensures that write_ctr != 0 is visible to any down_read/up_read
- * so that update_fast_ctr() can't succeed.
- *
- * 2. Ensures we see the result of every previous this_cpu_add() in
- * update_fast_ctr().
+ * Make rcu_sync_is_idle() == F and thus disable the fast-path in
+ * percpu_down_read() and percpu_up_read(), and wait for gp pass.
*
- * 3. Ensures that if any reader has exited its critical section via
- * fast-path, it executes a full memory barrier before we return.
- * See R_W case in the comment above update_fast_ctr().
+ * The latter synchronises us with the preceding readers which used
+ * the fast-past, so we can not miss the result of __this_cpu_add()
+ * or anything else inside their criticial sections.
*/
- synchronize_sched_expedited();
+ rcu_sync_enter(&brw->rss);
/* exclude other writers, and block the new readers completely */
down_write(&brw->rw_sem);
@@ -163,16 +142,17 @@ void percpu_down_write(struct percpu_rw_semaphore *brw)
/* wait for all readers to complete their percpu_up_read() */
wait_event(brw->write_waitq, !atomic_read(&brw->slow_read_ctr));
}
+EXPORT_SYMBOL_GPL(percpu_down_write);
void percpu_up_write(struct percpu_rw_semaphore *brw)
{
/* release the lock, but the readers can't use the fast-path */
up_write(&brw->rw_sem);
/*
- * Insert the barrier before the next fast-path in down_read,
- * see W_R case in the comment above update_fast_ctr().
+ * Enable the fast-path in percpu_down_read() and percpu_up_read()
+ * but only after another gp pass; this adds the necessary barrier
+ * to ensure the reader can't miss the changes done by us.
*/
- synchronize_sched_expedited();
- /* the last writer unblocks update_fast_ctr() */
- atomic_dec(&brw->write_ctr);
+ rcu_sync_exit(&brw->rss);
}
+EXPORT_SYMBOL_GPL(percpu_up_write);
diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c
index f17a3e3b3..fec082338 100644
--- a/kernel/locking/qrwlock.c
+++ b/kernel/locking/qrwlock.c
@@ -86,7 +86,7 @@ void queued_read_lock_slowpath(struct qrwlock *lock, u32 cnts)
/*
* Put the reader into the wait queue
*/
- arch_spin_lock(&lock->lock);
+ arch_spin_lock(&lock->wait_lock);
/*
* The ACQUIRE semantics of the following spinning code ensure
@@ -99,7 +99,7 @@ void queued_read_lock_slowpath(struct qrwlock *lock, u32 cnts)
/*
* Signal the next one in queue to become queue head
*/
- arch_spin_unlock(&lock->lock);
+ arch_spin_unlock(&lock->wait_lock);
}
EXPORT_SYMBOL(queued_read_lock_slowpath);
@@ -112,7 +112,7 @@ void queued_write_lock_slowpath(struct qrwlock *lock)
u32 cnts;
/* Put the writer into the wait queue */
- arch_spin_lock(&lock->lock);
+ arch_spin_lock(&lock->wait_lock);
/* Try to acquire the lock directly if no reader is present */
if (!atomic_read(&lock->cnts) &&
@@ -144,6 +144,6 @@ void queued_write_lock_slowpath(struct qrwlock *lock)
cpu_relax_lowlatency();
}
unlock:
- arch_spin_unlock(&lock->lock);
+ arch_spin_unlock(&lock->wait_lock);
}
EXPORT_SYMBOL(queued_write_lock_slowpath);
diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index c8e6e9a59..f0450ff48 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -267,7 +267,6 @@ static void pv_wait_head(struct qspinlock *lock, struct mcs_spinlock *node)
}
if (!lp) { /* ONCE */
- WRITE_ONCE(pn->state, vcpu_hashed);
lp = pv_hash(lock, pn);
/*
@@ -275,11 +274,9 @@ static void pv_wait_head(struct qspinlock *lock, struct mcs_spinlock *node)
* when we observe _Q_SLOW_VAL in __pv_queued_spin_unlock()
* we'll be sure to be able to observe our hash entry.
*
- * [S] pn->state
* [S] <hash> [Rmw] l->locked == _Q_SLOW_VAL
* MB RMB
* [RmW] l->locked = _Q_SLOW_VAL [L] <unhash>
- * [L] pn->state
*
* Matches the smp_rmb() in __pv_queued_spin_unlock().
*/
@@ -364,8 +361,7 @@ __visible void __pv_queued_spin_unlock(struct qspinlock *lock)
* vCPU is harmless other than the additional latency in completing
* the unlock.
*/
- if (READ_ONCE(node->state) == vcpu_hashed)
- pv_kick(node->cpu);
+ pv_kick(node->cpu);
}
/*
* Include the architecture specific callee-save thunk of the
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 7781d8012..8251e75dd 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -74,14 +74,23 @@ static void fixup_rt_mutex_waiters(struct rt_mutex *lock)
* set up.
*/
#ifndef CONFIG_DEBUG_RT_MUTEXES
-# define rt_mutex_cmpxchg(l,c,n) (cmpxchg(&l->owner, c, n) == c)
+# define rt_mutex_cmpxchg_relaxed(l,c,n) (cmpxchg_relaxed(&l->owner, c, n) == c)
+# define rt_mutex_cmpxchg_acquire(l,c,n) (cmpxchg_acquire(&l->owner, c, n) == c)
+# define rt_mutex_cmpxchg_release(l,c,n) (cmpxchg_release(&l->owner, c, n) == c)
+
+/*
+ * Callers must hold the ->wait_lock -- which is the whole purpose as we force
+ * all future threads that attempt to [Rmw] the lock to the slowpath. As such
+ * relaxed semantics suffice.
+ */
static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
{
unsigned long owner, *p = (unsigned long *) &lock->owner;
do {
owner = *p;
- } while (cmpxchg(p, owner, owner | RT_MUTEX_HAS_WAITERS) != owner);
+ } while (cmpxchg_relaxed(p, owner,
+ owner | RT_MUTEX_HAS_WAITERS) != owner);
}
/*
@@ -121,11 +130,14 @@ static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock)
* lock(wait_lock);
* acquire(lock);
*/
- return rt_mutex_cmpxchg(lock, owner, NULL);
+ return rt_mutex_cmpxchg_release(lock, owner, NULL);
}
#else
-# define rt_mutex_cmpxchg(l,c,n) (0)
+# define rt_mutex_cmpxchg_relaxed(l,c,n) (0)
+# define rt_mutex_cmpxchg_acquire(l,c,n) (0)
+# define rt_mutex_cmpxchg_release(l,c,n) (0)
+
static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
{
lock->owner = (struct task_struct *)
@@ -158,7 +170,8 @@ rt_mutex_waiter_less(struct rt_mutex_waiter *left,
* then right waiter has a dl_prio() too.
*/
if (dl_prio(left->prio))
- return (left->task->dl.deadline < right->task->dl.deadline);
+ return dl_time_before(left->task->dl.deadline,
+ right->task->dl.deadline);
return 0;
}
@@ -1321,7 +1334,7 @@ rt_mutex_fastlock(struct rt_mutex *lock, int state,
struct hrtimer_sleeper *timeout,
enum rtmutex_chainwalk chwalk))
{
- if (likely(rt_mutex_cmpxchg(lock, NULL, current))) {
+ if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) {
rt_mutex_deadlock_account_lock(lock, current);
return 0;
} else
@@ -1337,7 +1350,7 @@ rt_mutex_timed_fastlock(struct rt_mutex *lock, int state,
enum rtmutex_chainwalk chwalk))
{
if (chwalk == RT_MUTEX_MIN_CHAINWALK &&
- likely(rt_mutex_cmpxchg(lock, NULL, current))) {
+ likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) {
rt_mutex_deadlock_account_lock(lock, current);
return 0;
} else
@@ -1348,7 +1361,7 @@ static inline int
rt_mutex_fasttrylock(struct rt_mutex *lock,
int (*slowfn)(struct rt_mutex *lock))
{
- if (likely(rt_mutex_cmpxchg(lock, NULL, current))) {
+ if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) {
rt_mutex_deadlock_account_lock(lock, current);
return 1;
}
@@ -1362,7 +1375,7 @@ rt_mutex_fastunlock(struct rt_mutex *lock,
{
WAKE_Q(wake_q);
- if (likely(rt_mutex_cmpxchg(lock, current, NULL))) {
+ if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) {
rt_mutex_deadlock_account_unlock(current);
} else {
@@ -1484,7 +1497,7 @@ EXPORT_SYMBOL_GPL(rt_mutex_unlock);
bool __sched rt_mutex_futex_unlock(struct rt_mutex *lock,
struct wake_q_head *wqh)
{
- if (likely(rt_mutex_cmpxchg(lock, current, NULL))) {
+ if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) {
rt_mutex_deadlock_account_unlock(current);
return false;
}
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index 0f189714e..a4d4de05b 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -262,7 +262,7 @@ static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem)
* to reduce unnecessary expensive cmpxchg() operations.
*/
if (count == RWSEM_WAITING_BIAS &&
- cmpxchg(&sem->count, RWSEM_WAITING_BIAS,
+ cmpxchg_acquire(&sem->count, RWSEM_WAITING_BIAS,
RWSEM_ACTIVE_WRITE_BIAS) == RWSEM_WAITING_BIAS) {
if (!list_is_singular(&sem->wait_list))
rwsem_atomic_update(RWSEM_WAITING_BIAS, sem);
@@ -285,7 +285,8 @@ static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
if (!(count == 0 || count == RWSEM_WAITING_BIAS))
return false;
- old = cmpxchg(&sem->count, count, count + RWSEM_ACTIVE_WRITE_BIAS);
+ old = cmpxchg_acquire(&sem->count, count,
+ count + RWSEM_ACTIVE_WRITE_BIAS);
if (old == count) {
rwsem_set_owner(sem);
return true;