summaryrefslogtreecommitdiff
path: root/kernel/rcu
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/rcu')
-rw-r--r--kernel/rcu/rcuperf.c7
-rw-r--r--kernel/rcu/rcutorture.c62
-rw-r--r--kernel/rcu/tree.c32
-rw-r--r--kernel/rcu/tree.h1
-rw-r--r--kernel/rcu/tree_exp.h124
-rw-r--r--kernel/rcu/tree_trace.c7
6 files changed, 135 insertions, 98 deletions
diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c
index d38ab08a3fe7..123ccbd22449 100644
--- a/kernel/rcu/rcuperf.c
+++ b/kernel/rcu/rcuperf.c
@@ -52,7 +52,7 @@ MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.vnet.ibm.com>");
#define PERF_FLAG "-perf:"
#define PERFOUT_STRING(s) \
- pr_alert("%s" PERF_FLAG s "\n", perf_type)
+ pr_alert("%s" PERF_FLAG " %s\n", perf_type, s)
#define VERBOSE_PERFOUT_STRING(s) \
do { if (verbose) pr_alert("%s" PERF_FLAG " %s\n", perf_type, s); } while (0)
#define VERBOSE_PERFOUT_ERRSTRING(s) \
@@ -400,9 +400,8 @@ rcu_perf_writer(void *arg)
sp.sched_priority = 0;
sched_setscheduler_nocheck(current,
SCHED_NORMAL, &sp);
- pr_alert("%s" PERF_FLAG
- "rcu_perf_writer %ld has %d measurements\n",
- perf_type, me, MIN_MEAS);
+ pr_alert("%s%s rcu_perf_writer %ld has %d measurements\n",
+ perf_type, PERF_FLAG, me, MIN_MEAS);
if (atomic_inc_return(&n_rcu_perf_writer_finished) >=
nrealwriters) {
schedule_timeout_interruptible(10);
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 971e2b138063..bf08fee53dc7 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -1238,6 +1238,7 @@ rcu_torture_stats_print(void)
long pipesummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
long batchsummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
static unsigned long rtcv_snap = ULONG_MAX;
+ struct task_struct *wtp;
for_each_possible_cpu(cpu) {
for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) {
@@ -1258,8 +1259,9 @@ rcu_torture_stats_print(void)
atomic_read(&n_rcu_torture_alloc),
atomic_read(&n_rcu_torture_alloc_fail),
atomic_read(&n_rcu_torture_free));
- pr_cont("rtmbe: %d rtbke: %ld rtbre: %ld ",
+ pr_cont("rtmbe: %d rtbe: %ld rtbke: %ld rtbre: %ld ",
atomic_read(&n_rcu_torture_mberror),
+ n_rcu_torture_barrier_error,
n_rcu_torture_boost_ktrerror,
n_rcu_torture_boost_rterror);
pr_cont("rtbf: %ld rtb: %ld nt: %ld ",
@@ -1312,10 +1314,12 @@ rcu_torture_stats_print(void)
rcutorture_get_gp_data(cur_ops->ttype,
&flags, &gpnum, &completed);
- pr_alert("??? Writer stall state %s(%d) g%lu c%lu f%#x\n",
+ wtp = READ_ONCE(writer_task);
+ pr_alert("??? Writer stall state %s(%d) g%lu c%lu f%#x ->state %#lx\n",
rcu_torture_writer_state_getname(),
rcu_torture_writer_state,
- gpnum, completed, flags);
+ gpnum, completed, flags,
+ wtp == NULL ? ~0UL : wtp->state);
show_rcu_gp_kthreads();
rcu_ftrace_dump(DUMP_ALL);
}
@@ -1362,12 +1366,12 @@ rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, const char *tag)
onoff_interval, onoff_holdoff);
}
-static void rcutorture_booster_cleanup(int cpu)
+static int rcutorture_booster_cleanup(unsigned int cpu)
{
struct task_struct *t;
if (boost_tasks[cpu] == NULL)
- return;
+ return 0;
mutex_lock(&boost_mutex);
t = boost_tasks[cpu];
boost_tasks[cpu] = NULL;
@@ -1375,9 +1379,10 @@ static void rcutorture_booster_cleanup(int cpu)
/* This must be outside of the mutex, otherwise deadlock! */
torture_stop_kthread(rcu_torture_boost, t);
+ return 0;
}
-static int rcutorture_booster_init(int cpu)
+static int rcutorture_booster_init(unsigned int cpu)
{
int retval;
@@ -1577,28 +1582,7 @@ static void rcu_torture_barrier_cleanup(void)
}
}
-static int rcutorture_cpu_notify(struct notifier_block *self,
- unsigned long action, void *hcpu)
-{
- long cpu = (long)hcpu;
-
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_ONLINE:
- case CPU_DOWN_FAILED:
- (void)rcutorture_booster_init(cpu);
- break;
- case CPU_DOWN_PREPARE:
- rcutorture_booster_cleanup(cpu);
- break;
- default:
- break;
- }
- return NOTIFY_OK;
-}
-
-static struct notifier_block rcutorture_cpu_nb = {
- .notifier_call = rcutorture_cpu_notify,
-};
+static enum cpuhp_state rcutor_hp;
static void
rcu_torture_cleanup(void)
@@ -1638,11 +1622,8 @@ rcu_torture_cleanup(void)
for (i = 0; i < ncbflooders; i++)
torture_stop_kthread(rcu_torture_cbflood, cbflood_task[i]);
if ((test_boost == 1 && cur_ops->can_boost) ||
- test_boost == 2) {
- unregister_cpu_notifier(&rcutorture_cpu_nb);
- for_each_possible_cpu(i)
- rcutorture_booster_cleanup(i);
- }
+ test_boost == 2)
+ cpuhp_remove_state(rcutor_hp);
/*
* Wait for all RCU callbacks to fire, then do flavor-specific
@@ -1869,14 +1850,13 @@ rcu_torture_init(void)
test_boost == 2) {
boost_starttime = jiffies + test_boost_interval * HZ;
- register_cpu_notifier(&rcutorture_cpu_nb);
- for_each_possible_cpu(i) {
- if (cpu_is_offline(i))
- continue; /* Heuristic: CPU can go offline. */
- firsterr = rcutorture_booster_init(i);
- if (firsterr)
- goto unwind;
- }
+
+ firsterr = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "RCU_TORTURE",
+ rcutorture_booster_init,
+ rcutorture_booster_cleanup);
+ if (firsterr < 0)
+ goto unwind;
+ rcutor_hp = firsterr;
}
firsterr = torture_shutdown_init(shutdown_secs, rcu_torture_cleanup);
if (firsterr)
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 733902c33dd2..7e2e03879c2e 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3792,8 +3792,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
rnp = rdp->mynode;
mask = rdp->grpmask;
raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
- rnp->qsmaskinitnext |= mask;
- rnp->expmaskinitnext |= mask;
if (!rdp->beenonline)
WRITE_ONCE(rsp->ncpus, READ_ONCE(rsp->ncpus) + 1);
rdp->beenonline = true; /* We have now been online. */
@@ -3860,6 +3858,32 @@ int rcutree_dead_cpu(unsigned int cpu)
return 0;
}
+/*
+ * Mark the specified CPU as being online so that subsequent grace periods
+ * (both expedited and normal) will wait on it. Note that this means that
+ * incoming CPUs are not allowed to use RCU read-side critical sections
+ * until this function is called. Failing to observe this restriction
+ * will result in lockdep splats.
+ */
+void rcu_cpu_starting(unsigned int cpu)
+{
+ unsigned long flags;
+ unsigned long mask;
+ struct rcu_data *rdp;
+ struct rcu_node *rnp;
+ struct rcu_state *rsp;
+
+ for_each_rcu_flavor(rsp) {
+ rdp = this_cpu_ptr(rsp->rda);
+ rnp = rdp->mynode;
+ mask = rdp->grpmask;
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
+ rnp->qsmaskinitnext |= mask;
+ rnp->expmaskinitnext |= mask;
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+ }
+}
+
#ifdef CONFIG_HOTPLUG_CPU
/*
* The CPU is exiting the idle loop into the arch_cpu_idle_dead()
@@ -4209,8 +4233,10 @@ void __init rcu_init(void)
* or the scheduler are operational.
*/
pm_notifier(rcu_pm_notify, 0);
- for_each_online_cpu(cpu)
+ for_each_online_cpu(cpu) {
rcutree_prepare_cpu(cpu);
+ rcu_cpu_starting(cpu);
+ }
}
#include "tree_exp.h"
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index f714f873bf9d..e99a5234d9ed 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -400,6 +400,7 @@ struct rcu_data {
#ifdef CONFIG_RCU_FAST_NO_HZ
struct rcu_head oom_head;
#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
+ atomic_long_t exp_workdone0; /* # done by workqueue. */
atomic_long_t exp_workdone1; /* # done by others #1. */
atomic_long_t exp_workdone2; /* # done by others #2. */
atomic_long_t exp_workdone3; /* # done by others #3. */
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index 6d86ab6ec2c9..24343eb87b58 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -359,7 +359,8 @@ static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
if (raw_smp_processor_id() == cpu ||
- !(atomic_add_return(0, &rdtp->dynticks) & 0x1))
+ !(atomic_add_return(0, &rdtp->dynticks) & 0x1) ||
+ !(rnp->qsmaskinitnext & rdp->grpmask))
mask_ofl_test |= rdp->grpmask;
}
mask_ofl_ipi = rnp->expmask & ~mask_ofl_test;
@@ -384,17 +385,16 @@ retry_ipi:
mask_ofl_ipi &= ~mask;
continue;
}
- /* Failed, raced with offline. */
+ /* Failed, raced with CPU hotplug operation. */
raw_spin_lock_irqsave_rcu_node(rnp, flags);
- if (cpu_online(cpu) &&
+ if ((rnp->qsmaskinitnext & mask) &&
(rnp->expmask & mask)) {
+ /* Online, so delay for a bit and try again. */
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
schedule_timeout_uninterruptible(1);
- if (cpu_online(cpu) &&
- (rnp->expmask & mask))
- goto retry_ipi;
- raw_spin_lock_irqsave_rcu_node(rnp, flags);
+ goto retry_ipi;
}
+ /* CPU really is offline, so we can ignore it. */
if (!(rnp->expmask & mask))
mask_ofl_ipi &= ~mask;
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
@@ -427,12 +427,10 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
jiffies_stall);
if (ret > 0 || sync_rcu_preempt_exp_done(rnp_root))
return;
- if (ret < 0) {
- /* Hit a signal, disable CPU stall warnings. */
- swait_event(rsp->expedited_wq,
- sync_rcu_preempt_exp_done(rnp_root));
- return;
- }
+ WARN_ON(ret < 0); /* workqueues should not be signaled. */
+ if (rcu_cpu_stall_suppress)
+ continue;
+ panic_on_rcu_stall();
pr_err("INFO: %s detected expedited stalls on CPUs/tasks: {",
rsp->name);
ndetected = 0;
@@ -500,7 +498,6 @@ static void rcu_exp_wait_wake(struct rcu_state *rsp, unsigned long s)
* next GP, to proceed.
*/
mutex_lock(&rsp->exp_wake_mutex);
- mutex_unlock(&rsp->exp_mutex);
rcu_for_each_node_breadth_first(rsp, rnp) {
if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s)) {
@@ -516,6 +513,70 @@ static void rcu_exp_wait_wake(struct rcu_state *rsp, unsigned long s)
mutex_unlock(&rsp->exp_wake_mutex);
}
+/* Let the workqueue handler know what it is supposed to do. */
+struct rcu_exp_work {
+ smp_call_func_t rew_func;
+ struct rcu_state *rew_rsp;
+ unsigned long rew_s;
+ struct work_struct rew_work;
+};
+
+/*
+ * Work-queue handler to drive an expedited grace period forward.
+ */
+static void wait_rcu_exp_gp(struct work_struct *wp)
+{
+ struct rcu_exp_work *rewp;
+
+ /* Initialize the rcu_node tree in preparation for the wait. */
+ rewp = container_of(wp, struct rcu_exp_work, rew_work);
+ sync_rcu_exp_select_cpus(rewp->rew_rsp, rewp->rew_func);
+
+ /* Wait and clean up, including waking everyone. */
+ rcu_exp_wait_wake(rewp->rew_rsp, rewp->rew_s);
+}
+
+/*
+ * Given an rcu_state pointer and a smp_call_function() handler, kick
+ * off the specified flavor of expedited grace period.
+ */
+static void _synchronize_rcu_expedited(struct rcu_state *rsp,
+ smp_call_func_t func)
+{
+ struct rcu_data *rdp;
+ struct rcu_exp_work rew;
+ struct rcu_node *rnp;
+ unsigned long s;
+
+ /* If expedited grace periods are prohibited, fall back to normal. */
+ if (rcu_gp_is_normal()) {
+ wait_rcu_gp(rsp->call);
+ return;
+ }
+
+ /* Take a snapshot of the sequence number. */
+ s = rcu_exp_gp_seq_snap(rsp);
+ if (exp_funnel_lock(rsp, s))
+ return; /* Someone else did our work for us. */
+
+ /* Marshall arguments and schedule the expedited grace period. */
+ rew.rew_func = func;
+ rew.rew_rsp = rsp;
+ rew.rew_s = s;
+ INIT_WORK_ONSTACK(&rew.rew_work, wait_rcu_exp_gp);
+ schedule_work(&rew.rew_work);
+
+ /* Wait for expedited grace period to complete. */
+ rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id());
+ rnp = rcu_get_root(rsp);
+ wait_event(rnp->exp_wq[(s >> 1) & 0x3],
+ sync_exp_work_done(rsp,
+ &rdp->exp_workdone0, s));
+
+ /* Let the next expedited grace period start. */
+ mutex_unlock(&rsp->exp_mutex);
+}
+
/**
* synchronize_sched_expedited - Brute-force RCU-sched grace period
*
@@ -534,29 +595,13 @@ static void rcu_exp_wait_wake(struct rcu_state *rsp, unsigned long s)
*/
void synchronize_sched_expedited(void)
{
- unsigned long s;
struct rcu_state *rsp = &rcu_sched_state;
/* If only one CPU, this is automatically a grace period. */
if (rcu_blocking_is_gp())
return;
- /* If expedited grace periods are prohibited, fall back to normal. */
- if (rcu_gp_is_normal()) {
- wait_rcu_gp(call_rcu_sched);
- return;
- }
-
- /* Take a snapshot of the sequence number. */
- s = rcu_exp_gp_seq_snap(rsp);
- if (exp_funnel_lock(rsp, s))
- return; /* Someone else did our work for us. */
-
- /* Initialize the rcu_node tree in preparation for the wait. */
- sync_rcu_exp_select_cpus(rsp, sync_sched_exp_handler);
-
- /* Wait and clean up, including waking everyone. */
- rcu_exp_wait_wake(rsp, s);
+ _synchronize_rcu_expedited(rsp, sync_sched_exp_handler);
}
EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
@@ -620,23 +665,8 @@ static void sync_rcu_exp_handler(void *info)
void synchronize_rcu_expedited(void)
{
struct rcu_state *rsp = rcu_state_p;
- unsigned long s;
-
- /* If expedited grace periods are prohibited, fall back to normal. */
- if (rcu_gp_is_normal()) {
- wait_rcu_gp(call_rcu);
- return;
- }
-
- s = rcu_exp_gp_seq_snap(rsp);
- if (exp_funnel_lock(rsp, s))
- return; /* Someone else did our work for us. */
-
- /* Initialize the rcu_node tree in preparation for the wait. */
- sync_rcu_exp_select_cpus(rsp, sync_rcu_exp_handler);
- /* Wait for ->blkd_tasks lists to drain, then wake everyone up. */
- rcu_exp_wait_wake(rsp, s);
+ _synchronize_rcu_expedited(rsp, sync_rcu_exp_handler);
}
EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c
index 86782f9a4604..b1f28972872c 100644
--- a/kernel/rcu/tree_trace.c
+++ b/kernel/rcu/tree_trace.c
@@ -185,16 +185,17 @@ static int show_rcuexp(struct seq_file *m, void *v)
int cpu;
struct rcu_state *rsp = (struct rcu_state *)m->private;
struct rcu_data *rdp;
- unsigned long s1 = 0, s2 = 0, s3 = 0;
+ unsigned long s0 = 0, s1 = 0, s2 = 0, s3 = 0;
for_each_possible_cpu(cpu) {
rdp = per_cpu_ptr(rsp->rda, cpu);
+ s0 += atomic_long_read(&rdp->exp_workdone0);
s1 += atomic_long_read(&rdp->exp_workdone1);
s2 += atomic_long_read(&rdp->exp_workdone2);
s3 += atomic_long_read(&rdp->exp_workdone3);
}
- seq_printf(m, "s=%lu wd1=%lu wd2=%lu wd3=%lu n=%lu enq=%d sc=%lu\n",
- rsp->expedited_sequence, s1, s2, s3,
+ seq_printf(m, "s=%lu wd0=%lu wd1=%lu wd2=%lu wd3=%lu n=%lu enq=%d sc=%lu\n",
+ rsp->expedited_sequence, s0, s1, s2, s3,
atomic_long_read(&rsp->expedited_normal),
atomic_read(&rsp->expedited_need_qs),
rsp->expedited_sequence / 2);