summaryrefslogtreecommitdiff
path: root/kernel/rcu/tree.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/rcu/tree.c')
-rw-r--r--kernel/rcu/tree.c337
1 files changed, 183 insertions, 154 deletions
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index f07343b54..9fd5b628a 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -68,10 +68,6 @@ MODULE_ALIAS("rcutree");
/* Data structures. */
-static struct lock_class_key rcu_node_class[RCU_NUM_LVLS];
-static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS];
-static struct lock_class_key rcu_exp_class[RCU_NUM_LVLS];
-
/*
* In order to export the rcu_state name to the tracing tools, it
* needs to be added in the __tracepoint_string section.
@@ -246,24 +242,17 @@ static int rcu_gp_in_progress(struct rcu_state *rsp)
*/
void rcu_sched_qs(void)
{
- unsigned long flags;
-
- if (__this_cpu_read(rcu_sched_data.cpu_no_qs.s)) {
- trace_rcu_grace_period(TPS("rcu_sched"),
- __this_cpu_read(rcu_sched_data.gpnum),
- TPS("cpuqs"));
- __this_cpu_write(rcu_sched_data.cpu_no_qs.b.norm, false);
- if (!__this_cpu_read(rcu_sched_data.cpu_no_qs.b.exp))
- return;
- local_irq_save(flags);
- if (__this_cpu_read(rcu_sched_data.cpu_no_qs.b.exp)) {
- __this_cpu_write(rcu_sched_data.cpu_no_qs.b.exp, false);
- rcu_report_exp_rdp(&rcu_sched_state,
- this_cpu_ptr(&rcu_sched_data),
- true);
- }
- local_irq_restore(flags);
- }
+ if (!__this_cpu_read(rcu_sched_data.cpu_no_qs.s))
+ return;
+ trace_rcu_grace_period(TPS("rcu_sched"),
+ __this_cpu_read(rcu_sched_data.gpnum),
+ TPS("cpuqs"));
+ __this_cpu_write(rcu_sched_data.cpu_no_qs.b.norm, false);
+ if (!__this_cpu_read(rcu_sched_data.cpu_no_qs.b.exp))
+ return;
+ __this_cpu_write(rcu_sched_data.cpu_no_qs.b.exp, false);
+ rcu_report_exp_rdp(&rcu_sched_state,
+ this_cpu_ptr(&rcu_sched_data), true);
}
void rcu_bh_qs(void)
@@ -300,17 +289,16 @@ EXPORT_PER_CPU_SYMBOL_GPL(rcu_qs_ctr);
* We inform the RCU core by emulating a zero-duration dyntick-idle
* period, which we in turn do by incrementing the ->dynticks counter
* by two.
+ *
+ * The caller must have disabled interrupts.
*/
static void rcu_momentary_dyntick_idle(void)
{
- unsigned long flags;
struct rcu_data *rdp;
struct rcu_dynticks *rdtp;
int resched_mask;
struct rcu_state *rsp;
- local_irq_save(flags);
-
/*
* Yes, we can lose flag-setting operations. This is OK, because
* the flag will be set again after some delay.
@@ -340,13 +328,12 @@ static void rcu_momentary_dyntick_idle(void)
smp_mb__after_atomic(); /* Later stuff after QS. */
break;
}
- local_irq_restore(flags);
}
/*
* Note a context switch. This is a quiescent state for RCU-sched,
* and requires special handling for preemptible RCU.
- * The caller must have disabled preemption.
+ * The caller must have disabled interrupts.
*/
void rcu_note_context_switch(void)
{
@@ -376,9 +363,14 @@ EXPORT_SYMBOL_GPL(rcu_note_context_switch);
*/
void rcu_all_qs(void)
{
+ unsigned long flags;
+
barrier(); /* Avoid RCU read-side critical sections leaking down. */
- if (unlikely(raw_cpu_read(rcu_sched_qs_mask)))
+ if (unlikely(raw_cpu_read(rcu_sched_qs_mask))) {
+ local_irq_save(flags);
rcu_momentary_dyntick_idle();
+ local_irq_restore(flags);
+ }
this_cpu_inc(rcu_qs_ctr);
barrier(); /* Avoid RCU read-side critical sections leaking up. */
}
@@ -605,25 +597,25 @@ static int rcu_future_needs_gp(struct rcu_state *rsp)
* The caller must have disabled interrupts to prevent races with
* normal callback registry.
*/
-static int
+static bool
cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
{
int i;
if (rcu_gp_in_progress(rsp))
- return 0; /* No, a grace period is already in progress. */
+ return false; /* No, a grace period is already in progress. */
if (rcu_future_needs_gp(rsp))
- return 1; /* Yes, a no-CBs CPU needs one. */
+ return true; /* Yes, a no-CBs CPU needs one. */
if (!rdp->nxttail[RCU_NEXT_TAIL])
- return 0; /* No, this is a no-CBs (or offline) CPU. */
+ return false; /* No, this is a no-CBs (or offline) CPU. */
if (*rdp->nxttail[RCU_NEXT_READY_TAIL])
- return 1; /* Yes, this CPU has newly registered callbacks. */
+ return true; /* Yes, CPU has newly registered callbacks. */
for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++)
if (rdp->nxttail[i - 1] != rdp->nxttail[i] &&
ULONG_CMP_LT(READ_ONCE(rsp->completed),
rdp->nxtcompleted[i]))
- return 1; /* Yes, CBs for future grace period. */
- return 0; /* No grace period needed. */
+ return true; /* Yes, CBs for future grace period. */
+ return false; /* No grace period needed. */
}
/*
@@ -740,7 +732,7 @@ void rcu_user_enter(void)
*
* Exit from an interrupt handler, which might possibly result in entering
* idle mode, in other words, leaving the mode in which read-side critical
- * sections can occur.
+ * sections can occur. The caller must have disabled interrupts.
*
* This code assumes that the idle loop never does anything that might
* result in unbalanced calls to irq_enter() and irq_exit(). If your
@@ -753,11 +745,10 @@ void rcu_user_enter(void)
*/
void rcu_irq_exit(void)
{
- unsigned long flags;
long long oldval;
struct rcu_dynticks *rdtp;
- local_irq_save(flags);
+ RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_irq_exit() invoked with irqs enabled!!!");
rdtp = this_cpu_ptr(&rcu_dynticks);
oldval = rdtp->dynticks_nesting;
rdtp->dynticks_nesting--;
@@ -768,6 +759,17 @@ void rcu_irq_exit(void)
else
rcu_eqs_enter_common(oldval, true);
rcu_sysidle_enter(1);
+}
+
+/*
+ * Wrapper for rcu_irq_exit() where interrupts are enabled.
+ */
+void rcu_irq_exit_irqson(void)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ rcu_irq_exit();
local_irq_restore(flags);
}
@@ -865,7 +867,7 @@ void rcu_user_exit(void)
*
* Enter an interrupt handler, which might possibly result in exiting
* idle mode, in other words, entering the mode in which read-side critical
- * sections can occur.
+ * sections can occur. The caller must have disabled interrupts.
*
* Note that the Linux kernel is fully capable of entering an interrupt
* handler that it never exits, for example when doing upcalls to
@@ -881,11 +883,10 @@ void rcu_user_exit(void)
*/
void rcu_irq_enter(void)
{
- unsigned long flags;
struct rcu_dynticks *rdtp;
long long oldval;
- local_irq_save(flags);
+ RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_irq_enter() invoked with irqs enabled!!!");
rdtp = this_cpu_ptr(&rcu_dynticks);
oldval = rdtp->dynticks_nesting;
rdtp->dynticks_nesting++;
@@ -896,6 +897,17 @@ void rcu_irq_enter(void)
else
rcu_eqs_exit_common(oldval, true);
rcu_sysidle_exit(1);
+}
+
+/*
+ * Wrapper for rcu_irq_enter() where interrupts are enabled.
+ */
+void rcu_irq_enter_irqson(void)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ rcu_irq_enter();
local_irq_restore(flags);
}
@@ -1187,6 +1199,16 @@ static void record_gp_stall_check_time(struct rcu_state *rsp)
}
/*
+ * Convert a ->gp_state value to a character string.
+ */
+static const char *gp_state_getname(short gs)
+{
+ if (gs < 0 || gs >= ARRAY_SIZE(gp_state_names))
+ return "???";
+ return gp_state_names[gs];
+}
+
+/*
* Complain about starvation of grace-period kthread.
*/
static void rcu_check_gp_kthread_starvation(struct rcu_state *rsp)
@@ -1196,12 +1218,16 @@ static void rcu_check_gp_kthread_starvation(struct rcu_state *rsp)
j = jiffies;
gpa = READ_ONCE(rsp->gp_activity);
- if (j - gpa > 2 * HZ)
- pr_err("%s kthread starved for %ld jiffies! g%lu c%lu f%#x s%d ->state=%#lx\n",
+ if (j - gpa > 2 * HZ) {
+ pr_err("%s kthread starved for %ld jiffies! g%lu c%lu f%#x %s(%d) ->state=%#lx\n",
rsp->name, j - gpa,
rsp->gpnum, rsp->completed,
- rsp->gp_flags, rsp->gp_state,
- rsp->gp_kthread ? rsp->gp_kthread->state : 0);
+ rsp->gp_flags,
+ gp_state_getname(rsp->gp_state), rsp->gp_state,
+ rsp->gp_kthread ? rsp->gp_kthread->state : ~0);
+ if (rsp->gp_kthread)
+ sched_show_task(rsp->gp_kthread);
+ }
}
/*
@@ -1214,7 +1240,7 @@ static void rcu_dump_cpu_stacks(struct rcu_state *rsp)
struct rcu_node *rnp;
rcu_for_each_leaf_node(rsp, rnp) {
- raw_spin_lock_irqsave(&rnp->lock, flags);
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
if (rnp->qsmask != 0) {
for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++)
if (rnp->qsmask & (1UL << cpu))
@@ -1237,7 +1263,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum)
/* Only let one CPU complain about others per time interval. */
- raw_spin_lock_irqsave(&rnp->lock, flags);
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
delta = jiffies - READ_ONCE(rsp->jiffies_stall);
if (delta < RCU_STALL_RAT_DELAY || !rcu_gp_in_progress(rsp)) {
raw_spin_unlock_irqrestore(&rnp->lock, flags);
@@ -1256,7 +1282,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum)
rsp->name);
print_cpu_stall_info_begin();
rcu_for_each_leaf_node(rsp, rnp) {
- raw_spin_lock_irqsave(&rnp->lock, flags);
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
ndetected += rcu_print_task_stall(rnp);
if (rnp->qsmask != 0) {
for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++)
@@ -1327,7 +1353,7 @@ static void print_cpu_stall(struct rcu_state *rsp)
rcu_dump_cpu_stacks(rsp);
- raw_spin_lock_irqsave(&rnp->lock, flags);
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
if (ULONG_CMP_GE(jiffies, READ_ONCE(rsp->jiffies_stall)))
WRITE_ONCE(rsp->jiffies_stall,
jiffies + 3 * rcu_jiffies_till_stall_check() + 3);
@@ -1534,10 +1560,8 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
* hold it, acquire the root rcu_node structure's lock in order to
* start one (if needed).
*/
- if (rnp != rnp_root) {
- raw_spin_lock(&rnp_root->lock);
- smp_mb__after_unlock_lock();
- }
+ if (rnp != rnp_root)
+ raw_spin_lock_rcu_node(rnp_root);
/*
* Get a new grace-period number. If there really is no grace
@@ -1590,7 +1614,6 @@ static int rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
int needmore;
struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
- rcu_nocb_gp_cleanup(rsp, rnp);
rnp->need_future_gp[c & 0x1] = 0;
needmore = rnp->need_future_gp[(c + 1) & 0x1];
trace_rcu_future_gp(rnp, rdp, c,
@@ -1611,7 +1634,7 @@ static void rcu_gp_kthread_wake(struct rcu_state *rsp)
!READ_ONCE(rsp->gp_flags) ||
!rsp->gp_kthread)
return;
- wake_up(&rsp->gp_wq);
+ swake_up(&rsp->gp_wq);
}
/*
@@ -1786,11 +1809,10 @@ static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp)
if ((rdp->gpnum == READ_ONCE(rnp->gpnum) &&
rdp->completed == READ_ONCE(rnp->completed) &&
!unlikely(READ_ONCE(rdp->gpwrap))) || /* w/out lock. */
- !raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */
+ !raw_spin_trylock_rcu_node(rnp)) { /* irqs already off, so later. */
local_irq_restore(flags);
return;
}
- smp_mb__after_unlock_lock();
needwake = __note_gp_changes(rsp, rnp, rdp);
raw_spin_unlock_irqrestore(&rnp->lock, flags);
if (needwake)
@@ -1805,21 +1827,20 @@ static void rcu_gp_slow(struct rcu_state *rsp, int delay)
}
/*
- * Initialize a new grace period. Return 0 if no grace period required.
+ * Initialize a new grace period. Return false if no grace period required.
*/
-static int rcu_gp_init(struct rcu_state *rsp)
+static bool rcu_gp_init(struct rcu_state *rsp)
{
unsigned long oldmask;
struct rcu_data *rdp;
struct rcu_node *rnp = rcu_get_root(rsp);
WRITE_ONCE(rsp->gp_activity, jiffies);
- raw_spin_lock_irq(&rnp->lock);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irq_rcu_node(rnp);
if (!READ_ONCE(rsp->gp_flags)) {
/* Spurious wakeup, tell caller to go back to sleep. */
raw_spin_unlock_irq(&rnp->lock);
- return 0;
+ return false;
}
WRITE_ONCE(rsp->gp_flags, 0); /* Clear all flags: New grace period. */
@@ -1829,7 +1850,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
* Not supposed to be able to happen.
*/
raw_spin_unlock_irq(&rnp->lock);
- return 0;
+ return false;
}
/* Advance to a new grace period and initialize state. */
@@ -1847,8 +1868,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
*/
rcu_for_each_leaf_node(rsp, rnp) {
rcu_gp_slow(rsp, gp_preinit_delay);
- raw_spin_lock_irq(&rnp->lock);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irq_rcu_node(rnp);
if (rnp->qsmaskinit == rnp->qsmaskinitnext &&
!rnp->wait_blkd_tasks) {
/* Nothing to do on this leaf rcu_node structure. */
@@ -1904,8 +1924,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
*/
rcu_for_each_node_breadth_first(rsp, rnp) {
rcu_gp_slow(rsp, gp_init_delay);
- raw_spin_lock_irq(&rnp->lock);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irq_rcu_node(rnp);
rdp = this_cpu_ptr(rsp->rda);
rcu_preempt_check_blocked_tasks(rnp);
rnp->qsmask = rnp->qsmaskinit;
@@ -1923,7 +1942,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
WRITE_ONCE(rsp->gp_activity, jiffies);
}
- return 1;
+ return true;
}
/*
@@ -1973,8 +1992,7 @@ static void rcu_gp_fqs(struct rcu_state *rsp, bool first_time)
}
/* Clear flag to prevent immediate re-entry. */
if (READ_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
- raw_spin_lock_irq(&rnp->lock);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irq_rcu_node(rnp);
WRITE_ONCE(rsp->gp_flags,
READ_ONCE(rsp->gp_flags) & ~RCU_GP_FLAG_FQS);
raw_spin_unlock_irq(&rnp->lock);
@@ -1991,10 +2009,10 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
int nocb = 0;
struct rcu_data *rdp;
struct rcu_node *rnp = rcu_get_root(rsp);
+ struct swait_queue_head *sq;
WRITE_ONCE(rsp->gp_activity, jiffies);
- raw_spin_lock_irq(&rnp->lock);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irq_rcu_node(rnp);
gp_duration = jiffies - rsp->gp_start;
if (gp_duration > rsp->gp_max)
rsp->gp_max = gp_duration;
@@ -2019,8 +2037,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
* grace period is recorded in any of the rcu_node structures.
*/
rcu_for_each_node_breadth_first(rsp, rnp) {
- raw_spin_lock_irq(&rnp->lock);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irq_rcu_node(rnp);
WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp));
WARN_ON_ONCE(rnp->qsmask);
WRITE_ONCE(rnp->completed, rsp->gpnum);
@@ -2029,14 +2046,15 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
needgp = __note_gp_changes(rsp, rnp, rdp) || needgp;
/* smp_mb() provided by prior unlock-lock pair. */
nocb += rcu_future_gp_cleanup(rsp, rnp);
+ sq = rcu_nocb_gp_get(rnp);
raw_spin_unlock_irq(&rnp->lock);
+ rcu_nocb_gp_cleanup(sq);
cond_resched_rcu_qs();
WRITE_ONCE(rsp->gp_activity, jiffies);
rcu_gp_slow(rsp, gp_cleanup_delay);
}
rnp = rcu_get_root(rsp);
- raw_spin_lock_irq(&rnp->lock);
- smp_mb__after_unlock_lock(); /* Order GP before ->completed update. */
+ raw_spin_lock_irq_rcu_node(rnp); /* Order GP before ->completed update. */
rcu_nocb_gp_set(rnp, nocb);
/* Declare grace period done. */
@@ -2076,7 +2094,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
READ_ONCE(rsp->gpnum),
TPS("reqwait"));
rsp->gp_state = RCU_GP_WAIT_GPS;
- wait_event_interruptible(rsp->gp_wq,
+ swait_event_interruptible(rsp->gp_wq,
READ_ONCE(rsp->gp_flags) &
RCU_GP_FLAG_INIT);
rsp->gp_state = RCU_GP_DONE_GPS;
@@ -2106,7 +2124,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
READ_ONCE(rsp->gpnum),
TPS("fqswait"));
rsp->gp_state = RCU_GP_WAIT_FQS;
- ret = wait_event_interruptible_timeout(rsp->gp_wq,
+ ret = swait_event_interruptible_timeout(rsp->gp_wq,
rcu_gp_fqs_check_wake(rsp, &gf), j);
rsp->gp_state = RCU_GP_DOING_FQS;
/* Locking provides needed memory barriers. */
@@ -2230,7 +2248,7 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS);
raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags);
- rcu_gp_kthread_wake(rsp);
+ swake_up(&rsp->gp_wq); /* Memory barrier implied by swake_up() path. */
}
/*
@@ -2284,8 +2302,7 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
raw_spin_unlock_irqrestore(&rnp->lock, flags);
rnp_c = rnp;
rnp = rnp->parent;
- raw_spin_lock_irqsave(&rnp->lock, flags);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
oldmask = rnp_c->qsmask;
}
@@ -2332,8 +2349,7 @@ static void rcu_report_unblock_qs_rnp(struct rcu_state *rsp,
gps = rnp->gpnum;
mask = rnp->grpmask;
raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
- raw_spin_lock(&rnp_p->lock); /* irqs already disabled. */
- smp_mb__after_unlock_lock();
+ raw_spin_lock_rcu_node(rnp_p); /* irqs already disabled. */
rcu_report_qs_rnp(mask, rsp, rnp_p, gps, flags);
}
@@ -2355,8 +2371,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp)
struct rcu_node *rnp;
rnp = rdp->mynode;
- raw_spin_lock_irqsave(&rnp->lock, flags);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
if ((rdp->cpu_no_qs.b.norm &&
rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr)) ||
rdp->gpnum != rnp->gpnum || rnp->completed == rnp->gpnum ||
@@ -2582,8 +2597,7 @@ static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf)
rnp = rnp->parent;
if (!rnp)
break;
- raw_spin_lock(&rnp->lock); /* irqs already disabled. */
- smp_mb__after_unlock_lock(); /* GP memory ordering. */
+ raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
rnp->qsmaskinit &= ~mask;
rnp->qsmask &= ~mask;
if (rnp->qsmaskinit) {
@@ -2611,8 +2625,7 @@ static void rcu_cleanup_dying_idle_cpu(int cpu, struct rcu_state *rsp)
/* Remove outgoing CPU from mask in the leaf rcu_node structure. */
mask = rdp->grpmask;
- raw_spin_lock_irqsave(&rnp->lock, flags);
- smp_mb__after_unlock_lock(); /* Enforce GP memory-order guarantee. */
+ raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */
rnp->qsmaskinitnext &= ~mask;
raw_spin_unlock_irqrestore(&rnp->lock, flags);
}
@@ -2809,8 +2822,7 @@ static void force_qs_rnp(struct rcu_state *rsp,
rcu_for_each_leaf_node(rsp, rnp) {
cond_resched_rcu_qs();
mask = 0;
- raw_spin_lock_irqsave(&rnp->lock, flags);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
if (rnp->qsmask == 0) {
if (rcu_state_p == &rcu_sched_state ||
rsp != rcu_state_p ||
@@ -2881,8 +2893,7 @@ static void force_quiescent_state(struct rcu_state *rsp)
/* rnp_old == rcu_get_root(rsp), rnp == NULL. */
/* Reached the root of the rcu_node tree, acquire lock. */
- raw_spin_lock_irqsave(&rnp_old->lock, flags);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irqsave_rcu_node(rnp_old, flags);
raw_spin_unlock(&rnp_old->fqslock);
if (READ_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
rsp->n_force_qs_lh++;
@@ -2891,7 +2902,7 @@ static void force_quiescent_state(struct rcu_state *rsp)
}
WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS);
raw_spin_unlock_irqrestore(&rnp_old->lock, flags);
- rcu_gp_kthread_wake(rsp);
+ swake_up(&rsp->gp_wq); /* Memory barrier implied by swake_up() path. */
}
/*
@@ -2914,7 +2925,7 @@ __rcu_process_callbacks(struct rcu_state *rsp)
/* Does this CPU require a not-yet-started grace period? */
local_irq_save(flags);
if (cpu_needs_another_gp(rsp, rdp)) {
- raw_spin_lock(&rcu_get_root(rsp)->lock); /* irqs disabled. */
+ raw_spin_lock_rcu_node(rcu_get_root(rsp)); /* irqs disabled. */
needwake = rcu_start_gp(rsp);
raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags);
if (needwake)
@@ -3005,8 +3016,7 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
if (!rcu_gp_in_progress(rsp)) {
struct rcu_node *rnp_root = rcu_get_root(rsp);
- raw_spin_lock(&rnp_root->lock);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_rcu_node(rnp_root);
needwake = rcu_start_gp(rsp);
raw_spin_unlock(&rnp_root->lock);
if (needwake)
@@ -3365,7 +3375,6 @@ static unsigned long rcu_seq_snap(unsigned long *sp)
{
unsigned long s;
- smp_mb(); /* Caller's modifications seen first by other CPUs. */
s = (READ_ONCE(*sp) + 3) & ~0x1;
smp_mb(); /* Above access must not bleed into critical section. */
return s;
@@ -3392,6 +3401,7 @@ static void rcu_exp_gp_seq_end(struct rcu_state *rsp)
}
static unsigned long rcu_exp_gp_seq_snap(struct rcu_state *rsp)
{
+ smp_mb(); /* Caller's modifications seen first by other CPUs. */
return rcu_seq_snap(&rsp->expedited_sequence);
}
static bool rcu_exp_gp_seq_done(struct rcu_state *rsp, unsigned long s)
@@ -3426,8 +3436,7 @@ static void sync_exp_reset_tree_hotplug(struct rcu_state *rsp)
* CPUs for the current rcu_node structure up the rcu_node tree.
*/
rcu_for_each_leaf_node(rsp, rnp) {
- raw_spin_lock_irqsave(&rnp->lock, flags);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
if (rnp->expmaskinit == rnp->expmaskinitnext) {
raw_spin_unlock_irqrestore(&rnp->lock, flags);
continue; /* No new CPUs, nothing to do. */
@@ -3447,8 +3456,7 @@ static void sync_exp_reset_tree_hotplug(struct rcu_state *rsp)
rnp_up = rnp->parent;
done = false;
while (rnp_up) {
- raw_spin_lock_irqsave(&rnp_up->lock, flags);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irqsave_rcu_node(rnp_up, flags);
if (rnp_up->expmaskinit)
done = true;
rnp_up->expmaskinit |= mask;
@@ -3472,8 +3480,7 @@ static void __maybe_unused sync_exp_reset_tree(struct rcu_state *rsp)
sync_exp_reset_tree_hotplug(rsp);
rcu_for_each_node_breadth_first(rsp, rnp) {
- raw_spin_lock_irqsave(&rnp->lock, flags);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
WARN_ON_ONCE(rnp->expmask);
rnp->expmask = rnp->expmaskinit;
raw_spin_unlock_irqrestore(&rnp->lock, flags);
@@ -3524,15 +3531,14 @@ static void __rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
raw_spin_unlock_irqrestore(&rnp->lock, flags);
if (wake) {
smp_mb(); /* EGP done before wake_up(). */
- wake_up(&rsp->expedited_wq);
+ swake_up(&rsp->expedited_wq);
}
break;
}
mask = rnp->grpmask;
raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
rnp = rnp->parent;
- raw_spin_lock(&rnp->lock); /* irqs already disabled */
- smp_mb__after_unlock_lock();
+ raw_spin_lock_rcu_node(rnp); /* irqs already disabled */
WARN_ON_ONCE(!(rnp->expmask & mask));
rnp->expmask &= ~mask;
}
@@ -3549,8 +3555,7 @@ static void __maybe_unused rcu_report_exp_rnp(struct rcu_state *rsp,
{
unsigned long flags;
- raw_spin_lock_irqsave(&rnp->lock, flags);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
__rcu_report_exp_rnp(rsp, rnp, wake, flags);
}
@@ -3564,8 +3569,7 @@ static void rcu_report_exp_cpu_mult(struct rcu_state *rsp, struct rcu_node *rnp,
{
unsigned long flags;
- raw_spin_lock_irqsave(&rnp->lock, flags);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
if (!(rnp->expmask & mask)) {
raw_spin_unlock_irqrestore(&rnp->lock, flags);
return;
@@ -3609,7 +3613,7 @@ static bool sync_exp_work_done(struct rcu_state *rsp, struct rcu_node *rnp,
*/
static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
{
- struct rcu_data *rdp;
+ struct rcu_data *rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id());
struct rcu_node *rnp0;
struct rcu_node *rnp1 = NULL;
@@ -3623,7 +3627,7 @@ static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
if (!mutex_is_locked(&rnp0->exp_funnel_mutex)) {
if (mutex_trylock(&rnp0->exp_funnel_mutex)) {
if (sync_exp_work_done(rsp, rnp0, NULL,
- &rsp->expedited_workdone0, s))
+ &rdp->expedited_workdone0, s))
return NULL;
return rnp0;
}
@@ -3637,14 +3641,13 @@ static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
* can be inexact, as it is just promoting locality and is not
* strictly needed for correctness.
*/
- rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id());
- if (sync_exp_work_done(rsp, NULL, NULL, &rsp->expedited_workdone1, s))
+ if (sync_exp_work_done(rsp, NULL, NULL, &rdp->expedited_workdone1, s))
return NULL;
mutex_lock(&rdp->exp_funnel_mutex);
rnp0 = rdp->mynode;
for (; rnp0 != NULL; rnp0 = rnp0->parent) {
if (sync_exp_work_done(rsp, rnp1, rdp,
- &rsp->expedited_workdone2, s))
+ &rdp->expedited_workdone2, s))
return NULL;
mutex_lock(&rnp0->exp_funnel_mutex);
if (rnp1)
@@ -3654,7 +3657,7 @@ static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
rnp1 = rnp0;
}
if (sync_exp_work_done(rsp, rnp1, rdp,
- &rsp->expedited_workdone3, s))
+ &rdp->expedited_workdone3, s))
return NULL;
return rnp1;
}
@@ -3708,8 +3711,7 @@ static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
sync_exp_reset_tree(rsp);
rcu_for_each_leaf_node(rsp, rnp) {
- raw_spin_lock_irqsave(&rnp->lock, flags);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
/* Each pass checks a CPU for identity, offline, and idle. */
mask_ofl_test = 0;
@@ -3741,24 +3743,22 @@ retry_ipi:
ret = smp_call_function_single(cpu, func, rsp, 0);
if (!ret) {
mask_ofl_ipi &= ~mask;
- } else {
- /* Failed, raced with offline. */
- raw_spin_lock_irqsave(&rnp->lock, flags);
- if (cpu_online(cpu) &&
- (rnp->expmask & mask)) {
- raw_spin_unlock_irqrestore(&rnp->lock,
- flags);
- schedule_timeout_uninterruptible(1);
- if (cpu_online(cpu) &&
- (rnp->expmask & mask))
- goto retry_ipi;
- raw_spin_lock_irqsave(&rnp->lock,
- flags);
- }
- if (!(rnp->expmask & mask))
- mask_ofl_ipi &= ~mask;
+ continue;
+ }
+ /* Failed, raced with offline. */
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
+ if (cpu_online(cpu) &&
+ (rnp->expmask & mask)) {
raw_spin_unlock_irqrestore(&rnp->lock, flags);
+ schedule_timeout_uninterruptible(1);
+ if (cpu_online(cpu) &&
+ (rnp->expmask & mask))
+ goto retry_ipi;
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
}
+ if (!(rnp->expmask & mask))
+ mask_ofl_ipi &= ~mask;
+ raw_spin_unlock_irqrestore(&rnp->lock, flags);
}
/* Report quiescent states for those that went offline. */
mask_ofl_test |= mask_ofl_ipi;
@@ -3773,6 +3773,7 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
unsigned long jiffies_stall;
unsigned long jiffies_start;
unsigned long mask;
+ int ndetected;
struct rcu_node *rnp;
struct rcu_node *rnp_root = rcu_get_root(rsp);
int ret;
@@ -3781,28 +3782,30 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
jiffies_start = jiffies;
for (;;) {
- ret = wait_event_interruptible_timeout(
+ ret = swait_event_timeout(
rsp->expedited_wq,
sync_rcu_preempt_exp_done(rnp_root),
jiffies_stall);
- if (ret > 0)
+ if (ret > 0 || sync_rcu_preempt_exp_done(rnp_root))
return;
if (ret < 0) {
/* Hit a signal, disable CPU stall warnings. */
- wait_event(rsp->expedited_wq,
+ swait_event(rsp->expedited_wq,
sync_rcu_preempt_exp_done(rnp_root));
return;
}
pr_err("INFO: %s detected expedited stalls on CPUs/tasks: {",
rsp->name);
+ ndetected = 0;
rcu_for_each_leaf_node(rsp, rnp) {
- (void)rcu_print_task_exp_stall(rnp);
+ ndetected = rcu_print_task_exp_stall(rnp);
mask = 1;
for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask <<= 1) {
struct rcu_data *rdp;
if (!(rnp->expmask & mask))
continue;
+ ndetected++;
rdp = per_cpu_ptr(rsp->rda, cpu);
pr_cont(" %d-%c%c%c", cpu,
"O."[cpu_online(cpu)],
@@ -3811,8 +3814,23 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
}
mask <<= 1;
}
- pr_cont(" } %lu jiffies s: %lu\n",
- jiffies - jiffies_start, rsp->expedited_sequence);
+ pr_cont(" } %lu jiffies s: %lu root: %#lx/%c\n",
+ jiffies - jiffies_start, rsp->expedited_sequence,
+ rnp_root->expmask, ".T"[!!rnp_root->exp_tasks]);
+ if (!ndetected) {
+ pr_err("blocking rcu_node structures:");
+ rcu_for_each_node_breadth_first(rsp, rnp) {
+ if (rnp == rnp_root)
+ continue; /* printed unconditionally */
+ if (sync_rcu_preempt_exp_done(rnp))
+ continue;
+ pr_cont(" l=%u:%d-%d:%#lx/%c",
+ rnp->level, rnp->grplo, rnp->grphi,
+ rnp->expmask,
+ ".T"[!!rnp->exp_tasks]);
+ }
+ pr_cont("\n");
+ }
rcu_for_each_leaf_node(rsp, rnp) {
mask = 1;
for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask <<= 1) {
@@ -3847,6 +3865,16 @@ void synchronize_sched_expedited(void)
struct rcu_node *rnp;
struct rcu_state *rsp = &rcu_sched_state;
+ /* If only one CPU, this is automatically a grace period. */
+ if (rcu_blocking_is_gp())
+ return;
+
+ /* If expedited grace periods are prohibited, fall back to normal. */
+ if (rcu_gp_is_normal()) {
+ wait_rcu_gp(call_rcu_sched);
+ return;
+ }
+
/* Take a snapshot of the sequence number. */
s = rcu_exp_gp_seq_snap(rsp);
@@ -4135,7 +4163,7 @@ static void rcu_init_new_rnp(struct rcu_node *rnp_leaf)
rnp = rnp->parent;
if (rnp == NULL)
return;
- raw_spin_lock(&rnp->lock); /* Interrupts already disabled. */
+ raw_spin_lock_rcu_node(rnp); /* Interrupts already disabled. */
rnp->qsmaskinit |= mask;
raw_spin_unlock(&rnp->lock); /* Interrupts remain disabled. */
}
@@ -4152,7 +4180,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
struct rcu_node *rnp = rcu_get_root(rsp);
/* Set up local state, ensuring consistent view of global state. */
- raw_spin_lock_irqsave(&rnp->lock, flags);
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo);
rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE);
@@ -4179,7 +4207,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
struct rcu_node *rnp = rcu_get_root(rsp);
/* Set up local state, ensuring consistent view of global state. */
- raw_spin_lock_irqsave(&rnp->lock, flags);
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
rdp->qlen_last_fqs_check = 0;
rdp->n_force_qs_snap = rsp->n_force_qs;
rdp->blimit = blimit;
@@ -4198,8 +4226,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
*/
rnp = rdp->mynode;
mask = rdp->grpmask;
- raw_spin_lock(&rnp->lock); /* irqs already disabled. */
- smp_mb__after_unlock_lock();
+ raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
rnp->qsmaskinitnext |= mask;
rnp->expmaskinitnext |= mask;
if (!rdp->beenonline)
@@ -4327,14 +4354,14 @@ static int __init rcu_spawn_gp_kthread(void)
t = kthread_create(rcu_gp_kthread, rsp, "%s", rsp->name);
BUG_ON(IS_ERR(t));
rnp = rcu_get_root(rsp);
- raw_spin_lock_irqsave(&rnp->lock, flags);
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
rsp->gp_kthread = t;
if (kthread_prio) {
sp.sched_priority = kthread_prio;
sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
}
- wake_up_process(t);
raw_spin_unlock_irqrestore(&rnp->lock, flags);
+ wake_up_process(t);
}
rcu_spawn_nocb_kthreads();
rcu_spawn_boost_kthreads();
@@ -4385,12 +4412,14 @@ static void __init rcu_init_levelspread(int *levelspread, const int *levelcnt)
/*
* Helper function for rcu_init() that initializes one rcu_state structure.
*/
-static void __init rcu_init_one(struct rcu_state *rsp,
- struct rcu_data __percpu *rda)
+static void __init rcu_init_one(struct rcu_state *rsp)
{
static const char * const buf[] = RCU_NODE_NAME_INIT;
static const char * const fqs[] = RCU_FQS_NAME_INIT;
static const char * const exp[] = RCU_EXP_NAME_INIT;
+ static struct lock_class_key rcu_node_class[RCU_NUM_LVLS];
+ static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS];
+ static struct lock_class_key rcu_exp_class[RCU_NUM_LVLS];
static u8 fl_mask = 0x1;
int levelcnt[RCU_NUM_LVLS]; /* # nodes in each level. */
@@ -4455,8 +4484,8 @@ static void __init rcu_init_one(struct rcu_state *rsp,
}
}
- init_waitqueue_head(&rsp->gp_wq);
- init_waitqueue_head(&rsp->expedited_wq);
+ init_swait_queue_head(&rsp->gp_wq);
+ init_swait_queue_head(&rsp->expedited_wq);
rnp = rsp->level[rcu_num_lvls - 1];
for_each_possible_cpu(i) {
while (i > rnp->grphi)
@@ -4576,8 +4605,8 @@ void __init rcu_init(void)
rcu_bootup_announce();
rcu_init_geometry();
- rcu_init_one(&rcu_bh_state, &rcu_bh_data);
- rcu_init_one(&rcu_sched_state, &rcu_sched_data);
+ rcu_init_one(&rcu_bh_state);
+ rcu_init_one(&rcu_sched_state);
if (dump_tree)
rcu_dump_rcu_node_tree(&rcu_sched_state);
__rcu_init_preempt();