From d6ada2cf2f81dab8a231d0ef8fb5dec4f5ac8379 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 24 Jun 2015 10:46:30 -0700 Subject: rcu: Rework synchronize_sched_expedited() counter handling Now that synchronize_sched_expedited() have a mutex, it can use simpler work-already-done detection scheme. This commit simplifies this scheme by using something similar to the sequence-locking counter scheme. A counter is incremented before and after each grace period, so that the counter is odd in the midst of the grace period and even otherwise. So if the counter has advanced to the second even number that is greater than or equal to the snapshot, the required grace period has already happened. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_trace.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'kernel/rcu/tree_trace.c') diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c index 3ea7ffc7d5c4..a1ab3a5f6290 100644 --- a/kernel/rcu/tree_trace.c +++ b/kernel/rcu/tree_trace.c @@ -185,18 +185,14 @@ static int show_rcuexp(struct seq_file *m, void *v) { struct rcu_state *rsp = (struct rcu_state *)m->private; - seq_printf(m, "s=%lu d=%lu w=%lu tf=%lu wd1=%lu wd2=%lu n=%lu sc=%lu dt=%lu dl=%lu dx=%lu\n", - atomic_long_read(&rsp->expedited_start), - atomic_long_read(&rsp->expedited_done), - atomic_long_read(&rsp->expedited_wrap), + seq_printf(m, "t=%lu tf=%lu wd1=%lu wd2=%lu wd3=%lu n=%lu sc=%lu\n", + rsp->expedited_sequence, atomic_long_read(&rsp->expedited_tryfail), atomic_long_read(&rsp->expedited_workdone1), atomic_long_read(&rsp->expedited_workdone2), + rsp->expedited_workdone3, atomic_long_read(&rsp->expedited_normal), - atomic_long_read(&rsp->expedited_stoppedcpus), - atomic_long_read(&rsp->expedited_done_tries), - atomic_long_read(&rsp->expedited_done_lost), - atomic_long_read(&rsp->expedited_done_exit)); + rsp->expedited_sequence / 2); return 0; } -- cgit v1.2.3 From 385b73c06f6a733547d0a7714d0c4cb4c8788b88 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 24 Jun 2015 14:20:08 -0700 Subject: rcu: Get rid of synchronize_sched_expedited()'s polling loop This commit gets rid of synchronize_sched_expedited()'s mutex_trylock() polling loop in favor of a funnel-locking scheme based on the rcu_node tree. The work-done check is done at each level of the tree, allowing high-contention situations to be resolved quickly with reasonable levels of mutex contention. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_trace.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'kernel/rcu/tree_trace.c') diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c index a1ab3a5f6290..d2aab8dcd58e 100644 --- a/kernel/rcu/tree_trace.c +++ b/kernel/rcu/tree_trace.c @@ -185,12 +185,11 @@ static int show_rcuexp(struct seq_file *m, void *v) { struct rcu_state *rsp = (struct rcu_state *)m->private; - seq_printf(m, "t=%lu tf=%lu wd1=%lu wd2=%lu wd3=%lu n=%lu sc=%lu\n", + seq_printf(m, "t=%lu tf=%lu wd1=%lu wd2=%lu n=%lu sc=%lu\n", rsp->expedited_sequence, atomic_long_read(&rsp->expedited_tryfail), atomic_long_read(&rsp->expedited_workdone1), atomic_long_read(&rsp->expedited_workdone2), - rsp->expedited_workdone3, atomic_long_read(&rsp->expedited_normal), rsp->expedited_sequence / 2); return 0; -- cgit v1.2.3 From 3a6d7c64d78a78d279851524d39999637a549363 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 25 Jun 2015 11:27:10 -0700 Subject: rcu: Make expedited GP CPU stoppage asynchronous Sequentially stopping the CPUs slows down expedited grace periods by at least a factor of two, based on rcutorture's grace-period-per-second rate. This is a conservative measure because rcutorture uses unusually long RCU read-side critical sections and because rcutorture periodically quiesces the system in order to test RCU's ability to ramp down to and up from the idle state. This commit therefore replaces the stop_one_cpu() with stop_one_cpu_nowait(), using an atomic-counter scheme to determine when all CPUs have passed through the stopped state. Signed-off-by: Peter Zijlstra Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_trace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/rcu/tree_trace.c') diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c index d2aab8dcd58e..36c04b46d3b8 100644 --- a/kernel/rcu/tree_trace.c +++ b/kernel/rcu/tree_trace.c @@ -185,12 +185,13 @@ static int show_rcuexp(struct seq_file *m, void *v) { struct rcu_state *rsp = (struct rcu_state *)m->private; - seq_printf(m, "t=%lu tf=%lu wd1=%lu wd2=%lu n=%lu sc=%lu\n", + seq_printf(m, "t=%lu tf=%lu wd1=%lu wd2=%lu n=%lu enq=%d sc=%lu\n", rsp->expedited_sequence, atomic_long_read(&rsp->expedited_tryfail), atomic_long_read(&rsp->expedited_workdone1), atomic_long_read(&rsp->expedited_workdone2), atomic_long_read(&rsp->expedited_normal), + atomic_read(&rsp->expedited_need_qs), rsp->expedited_sequence / 2); return 0; } -- cgit v1.2.3 From 4f525a528b9e75571c6bedc6202beff1ced24c32 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 26 Jun 2015 11:20:00 -0700 Subject: rcu: Apply rcu_seq operations to _rcu_barrier() The rcu_seq operations were open-coded in _rcu_barrier(), so this commit replaces the open-coding with the shiny new rcu_seq operations. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_trace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/rcu/tree_trace.c') diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c index 36c04b46d3b8..d9982a2ce305 100644 --- a/kernel/rcu/tree_trace.c +++ b/kernel/rcu/tree_trace.c @@ -81,9 +81,9 @@ static void r_stop(struct seq_file *m, void *v) static int show_rcubarrier(struct seq_file *m, void *v) { struct rcu_state *rsp = (struct rcu_state *)m->private; - seq_printf(m, "bcc: %d nbd: %lu\n", + seq_printf(m, "bcc: %d bseq: %lu\n", atomic_read(&rsp->barrier_cpu_count), - rsp->n_barrier_done); + rsp->barrier_sequence); return 0; } -- cgit v1.2.3 From 2cd6ffafec066118365f6d7eb7a42ea16c1f032c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 29 Jun 2015 17:06:39 -0700 Subject: rcu: Extend expedited funnel locking to rcu_data structure The strictly rcu_node based funnel-locking scheme works well in many cases, but systems with CONFIG_RCU_FANOUT_LEAF=64 won't necessarily get all that much concurrency. This commit therefore extends the funnel locking into the per-CPU rcu_data structure, providing concurrency equal to the number of CPUs. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_trace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/rcu/tree_trace.c') diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c index d9982a2ce305..ec62369f1b02 100644 --- a/kernel/rcu/tree_trace.c +++ b/kernel/rcu/tree_trace.c @@ -185,11 +185,12 @@ static int show_rcuexp(struct seq_file *m, void *v) { struct rcu_state *rsp = (struct rcu_state *)m->private; - seq_printf(m, "t=%lu tf=%lu wd1=%lu wd2=%lu n=%lu enq=%d sc=%lu\n", + seq_printf(m, "t=%lu tf=%lu wd1=%lu wd2=%lu wd3=%lu n=%lu enq=%d sc=%lu\n", rsp->expedited_sequence, atomic_long_read(&rsp->expedited_tryfail), atomic_long_read(&rsp->expedited_workdone1), atomic_long_read(&rsp->expedited_workdone2), + atomic_long_read(&rsp->expedited_workdone3), atomic_long_read(&rsp->expedited_normal), atomic_read(&rsp->expedited_need_qs), rsp->expedited_sequence / 2); -- cgit v1.2.3 From cdacbe1f91264687af956e810278030f2ab5a3d0 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 11 Jul 2015 16:24:45 -0700 Subject: rcu: Add fastpath bypassing funnel locking In the common case, there will be only one expedited grace period in the system at a given time, in which case it is not helpful to use funnel locking. This commit therefore adds a fastpath that bypasses funnel locking when the root ->exp_funnel_mutex is not held. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_trace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/rcu/tree_trace.c') diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c index ec62369f1b02..6fc4c5ff3bb5 100644 --- a/kernel/rcu/tree_trace.c +++ b/kernel/rcu/tree_trace.c @@ -185,9 +185,9 @@ static int show_rcuexp(struct seq_file *m, void *v) { struct rcu_state *rsp = (struct rcu_state *)m->private; - seq_printf(m, "t=%lu tf=%lu wd1=%lu wd2=%lu wd3=%lu n=%lu enq=%d sc=%lu\n", + seq_printf(m, "s=%lu wd0=%lu wd1=%lu wd2=%lu wd3=%lu n=%lu enq=%d sc=%lu\n", rsp->expedited_sequence, - atomic_long_read(&rsp->expedited_tryfail), + atomic_long_read(&rsp->expedited_workdone0), atomic_long_read(&rsp->expedited_workdone1), atomic_long_read(&rsp->expedited_workdone2), atomic_long_read(&rsp->expedited_workdone3), -- cgit v1.2.3