From e66c33d579ea566d10e8c8695a7168aae3e02992 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Fri, 12 Jul 2013 16:50:28 -0400 Subject: rcu: Add const annotation to char * for RCU tracepoints and functions All the RCU tracepoints and functions that reference char pointers do so with just 'char *' even though they do not modify the contents of the string itself. This will cause warnings if a const char * is used in one of these functions. The RCU tracepoints store the pointer to the string to refer back to them when the trace output is displayed. As this can be minutes, hours or even days later, those strings had better be constant. This change also opens the door to allow the RCU tracepoint strings and their addresses to be exported so that userspace tracing tools can translate the contents of the pointers of the RCU tracepoints. Signed-off-by: Steven Rostedt --- kernel/rcutree.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 068de3a93606..30201494560b 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1032,7 +1032,7 @@ static unsigned long rcu_cbs_completed(struct rcu_state *rsp, * rcu_nocb_wait_gp(). */ static void trace_rcu_future_gp(struct rcu_node *rnp, struct rcu_data *rdp, - unsigned long c, char *s) + unsigned long c, const char *s) { trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum, rnp->completed, c, rnp->level, @@ -2720,7 +2720,7 @@ static int rcu_cpu_has_callbacks(int cpu, bool *all_lazy) * Helper function for _rcu_barrier() tracing. If tracing is disabled, * the compiler is expected to optimize this away. */ -static void _rcu_barrier_trace(struct rcu_state *rsp, char *s, +static void _rcu_barrier_trace(struct rcu_state *rsp, const char *s, int cpu, unsigned long done) { trace_rcu_barrier(rsp->name, s, cpu, -- cgit v1.2.3 From a41bfeb2f8ed59410be7ca0f8fbc6138a758b746 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Fri, 12 Jul 2013 17:00:28 -0400 Subject: rcu: Simplify RCU_STATE_INITIALIZER() macro The RCU_STATE_INITIALIZER() macro is used only in the rcutree.c file as well as the rcutree_plugin.h file. It is passed as a rvalue to a variable of a similar name. A per_cpu variable is also created with a similar name as well. The uses of RCU_STATE_INITIALIZER() can be simplified to remove some of the duplicate code that is done. Currently the three users of this macro has this format: struct rcu_state rcu_sched_state = RCU_STATE_INITIALIZER(rcu_sched, call_rcu_sched); DEFINE_PER_CPU(struct rcu_data, rcu_sched_data); Notice that "rcu_sched" is called three times. This is the same with the other two users. This can be condensed to just: RCU_STATE_INITIALIZER(rcu_sched, call_rcu_sched); by moving the rest into the macro itself. This also opens the door to allow the RCU tracepoint strings and their addresses to be exported so that userspace tracing tools can translate the contents of the pointers of the RCU tracepoints. The change will allow for helper code to be placed in the RCU_STATE_INITIALIZER() macro to export the name that is used. Signed-off-by: Steven Rostedt --- kernel/rcutree.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 30201494560b..97994a329d80 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -64,7 +64,8 @@ static struct lock_class_key rcu_node_class[RCU_NUM_LVLS]; static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS]; -#define RCU_STATE_INITIALIZER(sname, sabbr, cr) { \ +#define RCU_STATE_INITIALIZER(sname, sabbr, cr) \ +struct rcu_state sname##_state = { \ .level = { &sname##_state.node[0] }, \ .call = cr, \ .fqs_state = RCU_GP_IDLE, \ @@ -77,14 +78,11 @@ static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS]; .onoff_mutex = __MUTEX_INITIALIZER(sname##_state.onoff_mutex), \ .name = #sname, \ .abbr = sabbr, \ -} - -struct rcu_state rcu_sched_state = - RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched); -DEFINE_PER_CPU(struct rcu_data, rcu_sched_data); +}; \ +DEFINE_PER_CPU(struct rcu_data, sname##_data) -struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh); -DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); +RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched); +RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh); static struct rcu_state *rcu_state; LIST_HEAD(rcu_struct_flavors); -- cgit v1.2.3 From f7f7bac9cb1c50783f15937a11743655a5756a36 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Fri, 12 Jul 2013 17:18:47 -0400 Subject: rcu: Have the RCU tracepoints use the tracepoint_string infrastructure Currently, RCU tracepoints save only a pointer to strings in the ring buffer. When displayed via the /sys/kernel/debug/tracing/trace file they are referenced like the printf "%s" that looks at the address in the ring buffer and prints out the string it points too. This requires that the strings are constant and persistent in the kernel. The problem with this is for tools like trace-cmd and perf that read the binary data from the buffers but have no access to the kernel memory to find out what string is represented by the address in the buffer. By using the tracepoint_string infrastructure, the RCU tracepoint strings can be exported such that userspace tools can map the addresses to the strings. # cat /sys/kernel/debug/tracing/printk_formats 0xffffffff81a4a0e8 : "rcu_preempt" 0xffffffff81a4a0f4 : "rcu_bh" 0xffffffff81a4a100 : "rcu_sched" 0xffffffff818437a0 : "cpuqs" 0xffffffff818437a6 : "rcu_sched" 0xffffffff818437a0 : "cpuqs" 0xffffffff818437b0 : "rcu_bh" 0xffffffff818437b7 : "Start context switch" 0xffffffff818437cc : "End context switch" 0xffffffff818437a0 : "cpuqs" [...] Now userspaces tools can display: rcu_utilization: Start context switch rcu_dyntick: Start 1 0 rcu_utilization: End context switch rcu_batch_start: rcu_preempt CBs=0/5 bl=10 rcu_dyntick: End 0 140000000000000 rcu_invoke_callback: rcu_preempt rhp=0xffff880071c0d600 func=proc_i_callback rcu_invoke_callback: rcu_preempt rhp=0xffff880077b5b230 func=__d_free rcu_dyntick: Start 140000000000000 0 rcu_invoke_callback: rcu_preempt rhp=0xffff880077563980 func=file_free_rcu rcu_batch_end: rcu_preempt CBs-invoked=3 idle=>c<>c<>c<>c< rcu_utilization: End RCU core rcu_grace_period: rcu_preempt 9741 start rcu_dyntick: Start 1 0 rcu_dyntick: End 0 140000000000000 rcu_dyntick: Start 140000000000000 0 Instead of: rcu_utilization: ffffffff81843110 rcu_future_grace_period: ffffffff81842f1d 9939 9939 9940 0 0 3 ffffffff81842f32 rcu_batch_start: ffffffff81842f1d CBs=0/4 bl=10 rcu_future_grace_period: ffffffff81842f1d 9939 9939 9940 0 0 3 ffffffff81842f3c rcu_grace_period: ffffffff81842f1d 9939 ffffffff81842f80 rcu_invoke_callback: ffffffff81842f1d rhp=0xffff88007888aac0 func=file_free_rcu rcu_grace_period: ffffffff81842f1d 9939 ffffffff81842f95 rcu_invoke_callback: ffffffff81842f1d rhp=0xffff88006aeb4600 func=proc_i_callback rcu_future_grace_period: ffffffff81842f1d 9939 9939 9940 0 0 3 ffffffff81842f32 rcu_future_grace_period: ffffffff81842f1d 9939 9939 9940 0 0 3 ffffffff81842f3c rcu_invoke_callback: ffffffff81842f1d rhp=0xffff880071cb9fc0 func=__d_free rcu_grace_period: ffffffff81842f1d 9939 ffffffff81842f80 rcu_invoke_callback: ffffffff81842f1d rhp=0xffff88007888ae80 func=file_free_rcu rcu_batch_end: ffffffff81842f1d CBs-invoked=4 idle=>c<>c<>c<>c< rcu_utilization: ffffffff8184311f Signed-off-by: Steven Rostedt --- kernel/rcutree.c | 87 ++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 53 insertions(+), 34 deletions(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 97994a329d80..338f1d1c1c66 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -53,18 +53,36 @@ #include #include #include +#include #include "rcutree.h" #include #include "rcu.h" +/* + * Strings used in tracepoints need to be exported via the + * tracing system such that tools like perf and trace-cmd can + * translate the string address pointers to actual text. + */ +#define TPS(x) tracepoint_string(x) + /* Data structures. */ static struct lock_class_key rcu_node_class[RCU_NUM_LVLS]; static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS]; +/* + * In order to export the rcu_state name to the tracing tools, it + * needs to be added in the __tracepoint_string section. + * This requires defining a separate variable tp__varname + * that points to the string being used, and this will allow + * the tracing userspace tools to be able to decipher the string + * address to the matching string. + */ #define RCU_STATE_INITIALIZER(sname, sabbr, cr) \ +static char sname##_varname[] = #sname; \ +static const char *tp_##sname##_varname __used __tracepoint_string = sname##_varname; \ struct rcu_state sname##_state = { \ .level = { &sname##_state.node[0] }, \ .call = cr, \ @@ -76,7 +94,7 @@ struct rcu_state sname##_state = { \ .orphan_donetail = &sname##_state.orphan_donelist, \ .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \ .onoff_mutex = __MUTEX_INITIALIZER(sname##_state.onoff_mutex), \ - .name = #sname, \ + .name = sname##_varname, \ .abbr = sabbr, \ }; \ DEFINE_PER_CPU(struct rcu_data, sname##_data) @@ -176,7 +194,7 @@ void rcu_sched_qs(int cpu) struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu); if (rdp->passed_quiesce == 0) - trace_rcu_grace_period("rcu_sched", rdp->gpnum, "cpuqs"); + trace_rcu_grace_period(TPS("rcu_sched"), rdp->gpnum, TPS("cpuqs")); rdp->passed_quiesce = 1; } @@ -185,7 +203,7 @@ void rcu_bh_qs(int cpu) struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu); if (rdp->passed_quiesce == 0) - trace_rcu_grace_period("rcu_bh", rdp->gpnum, "cpuqs"); + trace_rcu_grace_period(TPS("rcu_bh"), rdp->gpnum, TPS("cpuqs")); rdp->passed_quiesce = 1; } @@ -196,10 +214,10 @@ void rcu_bh_qs(int cpu) */ void rcu_note_context_switch(int cpu) { - trace_rcu_utilization("Start context switch"); + trace_rcu_utilization(TPS("Start context switch")); rcu_sched_qs(cpu); rcu_preempt_note_context_switch(cpu); - trace_rcu_utilization("End context switch"); + trace_rcu_utilization(TPS("End context switch")); } EXPORT_SYMBOL_GPL(rcu_note_context_switch); @@ -343,11 +361,11 @@ static struct rcu_node *rcu_get_root(struct rcu_state *rsp) static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval, bool user) { - trace_rcu_dyntick("Start", oldval, rdtp->dynticks_nesting); + trace_rcu_dyntick(TPS("Start"), oldval, rdtp->dynticks_nesting); if (!user && !is_idle_task(current)) { struct task_struct *idle = idle_task(smp_processor_id()); - trace_rcu_dyntick("Error on entry: not idle task", oldval, 0); + trace_rcu_dyntick(TPS("Error on entry: not idle task"), oldval, 0); ftrace_dump(DUMP_ORIG); WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", current->pid, current->comm, @@ -477,7 +495,7 @@ void rcu_irq_exit(void) rdtp->dynticks_nesting--; WARN_ON_ONCE(rdtp->dynticks_nesting < 0); if (rdtp->dynticks_nesting) - trace_rcu_dyntick("--=", oldval, rdtp->dynticks_nesting); + trace_rcu_dyntick(TPS("--="), oldval, rdtp->dynticks_nesting); else rcu_eqs_enter_common(rdtp, oldval, true); local_irq_restore(flags); @@ -499,11 +517,11 @@ static void rcu_eqs_exit_common(struct rcu_dynticks *rdtp, long long oldval, smp_mb__after_atomic_inc(); /* See above. */ WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); rcu_cleanup_after_idle(smp_processor_id()); - trace_rcu_dyntick("End", oldval, rdtp->dynticks_nesting); + trace_rcu_dyntick(TPS("End"), oldval, rdtp->dynticks_nesting); if (!user && !is_idle_task(current)) { struct task_struct *idle = idle_task(smp_processor_id()); - trace_rcu_dyntick("Error on exit: not idle task", + trace_rcu_dyntick(TPS("Error on exit: not idle task"), oldval, rdtp->dynticks_nesting); ftrace_dump(DUMP_ORIG); WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", @@ -618,7 +636,7 @@ void rcu_irq_enter(void) rdtp->dynticks_nesting++; WARN_ON_ONCE(rdtp->dynticks_nesting == 0); if (oldval) - trace_rcu_dyntick("++=", oldval, rdtp->dynticks_nesting); + trace_rcu_dyntick(TPS("++="), oldval, rdtp->dynticks_nesting); else rcu_eqs_exit_common(rdtp, oldval, true); local_irq_restore(flags); @@ -773,7 +791,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) * of the current RCU grace period. */ if ((curr & 0x1) == 0 || UINT_CMP_GE(curr, snap + 2)) { - trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "dti"); + trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti")); rdp->dynticks_fqs++; return 1; } @@ -793,7 +811,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) return 0; /* Grace period is not old enough. */ barrier(); if (cpu_is_offline(rdp->cpu)) { - trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "ofl"); + trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("ofl")); rdp->offline_fqs++; return 1; } @@ -1056,9 +1074,9 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp) * grace period is already marked as needed, return to the caller. */ c = rcu_cbs_completed(rdp->rsp, rnp); - trace_rcu_future_gp(rnp, rdp, c, "Startleaf"); + trace_rcu_future_gp(rnp, rdp, c, TPS("Startleaf")); if (rnp->need_future_gp[c & 0x1]) { - trace_rcu_future_gp(rnp, rdp, c, "Prestartleaf"); + trace_rcu_future_gp(rnp, rdp, c, TPS("Prestartleaf")); return c; } @@ -1072,7 +1090,7 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp) if (rnp->gpnum != rnp->completed || ACCESS_ONCE(rnp->gpnum) != ACCESS_ONCE(rnp->completed)) { rnp->need_future_gp[c & 0x1]++; - trace_rcu_future_gp(rnp, rdp, c, "Startedleaf"); + trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleaf")); return c; } @@ -1100,7 +1118,7 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp) * recorded, trace and leave. */ if (rnp_root->need_future_gp[c & 0x1]) { - trace_rcu_future_gp(rnp, rdp, c, "Prestartedroot"); + trace_rcu_future_gp(rnp, rdp, c, TPS("Prestartedroot")); goto unlock_out; } @@ -1109,9 +1127,9 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp) /* If a grace period is not already in progress, start one. */ if (rnp_root->gpnum != rnp_root->completed) { - trace_rcu_future_gp(rnp, rdp, c, "Startedleafroot"); + trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleafroot")); } else { - trace_rcu_future_gp(rnp, rdp, c, "Startedroot"); + trace_rcu_future_gp(rnp, rdp, c, TPS("Startedroot")); rcu_start_gp_advanced(rdp->rsp, rnp_root, rdp); } unlock_out: @@ -1135,7 +1153,8 @@ static int rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) rcu_nocb_gp_cleanup(rsp, rnp); rnp->need_future_gp[c & 0x1] = 0; needmore = rnp->need_future_gp[(c + 1) & 0x1]; - trace_rcu_future_gp(rnp, rdp, c, needmore ? "CleanupMore" : "Cleanup"); + trace_rcu_future_gp(rnp, rdp, c, + needmore ? TPS("CleanupMore") : TPS("Cleanup")); return needmore; } @@ -1203,9 +1222,9 @@ static void rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp, /* Trace depending on how much we were able to accelerate. */ if (!*rdp->nxttail[RCU_WAIT_TAIL]) - trace_rcu_grace_period(rsp->name, rdp->gpnum, "AccWaitCB"); + trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccWaitCB")); else - trace_rcu_grace_period(rsp->name, rdp->gpnum, "AccReadyCB"); + trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccReadyCB")); } /* @@ -1271,7 +1290,7 @@ static void __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp, struc /* Remember that we saw this grace-period completion. */ rdp->completed = rnp->completed; - trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpuend"); + trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuend")); } if (rdp->gpnum != rnp->gpnum) { @@ -1281,7 +1300,7 @@ static void __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp, struc * go looking for one. */ rdp->gpnum = rnp->gpnum; - trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpustart"); + trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpustart")); rdp->passed_quiesce = 0; rdp->qs_pending = !!(rnp->qsmask & rdp->grpmask); zero_cpu_stall_ticks(rdp); @@ -1324,7 +1343,7 @@ static int rcu_gp_init(struct rcu_state *rsp) /* Advance to a new grace period and initialize state. */ rsp->gpnum++; - trace_rcu_grace_period(rsp->name, rsp->gpnum, "start"); + trace_rcu_grace_period(rsp->name, rsp->gpnum, TPS("start")); record_gp_stall_check_time(rsp); raw_spin_unlock_irq(&rnp->lock); @@ -1446,7 +1465,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp) rcu_nocb_gp_set(rnp, nocb); rsp->completed = rsp->gpnum; /* Declare grace period done. */ - trace_rcu_grace_period(rsp->name, rsp->completed, "end"); + trace_rcu_grace_period(rsp->name, rsp->completed, TPS("end")); rsp->fqs_state = RCU_GP_IDLE; rdp = this_cpu_ptr(rsp->rda); rcu_advance_cbs(rsp, rnp, rdp); /* Reduce false positives below. */ @@ -1855,7 +1874,7 @@ static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) RCU_TRACE(mask = rdp->grpmask); trace_rcu_grace_period(rsp->name, rnp->gpnum + 1 - !!(rnp->qsmask & mask), - "cpuofl"); + TPS("cpuofl")); } /* @@ -2042,7 +2061,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) */ void rcu_check_callbacks(int cpu, int user) { - trace_rcu_utilization("Start scheduler-tick"); + trace_rcu_utilization(TPS("Start scheduler-tick")); increment_cpu_stall_ticks(); if (user || rcu_is_cpu_rrupt_from_idle()) { @@ -2075,7 +2094,7 @@ void rcu_check_callbacks(int cpu, int user) rcu_preempt_check_callbacks(cpu); if (rcu_pending(cpu)) invoke_rcu_core(); - trace_rcu_utilization("End scheduler-tick"); + trace_rcu_utilization(TPS("End scheduler-tick")); } /* @@ -2206,10 +2225,10 @@ static void rcu_process_callbacks(struct softirq_action *unused) if (cpu_is_offline(smp_processor_id())) return; - trace_rcu_utilization("Start RCU core"); + trace_rcu_utilization(TPS("Start RCU core")); for_each_rcu_flavor(rsp) __rcu_process_callbacks(rsp); - trace_rcu_utilization("End RCU core"); + trace_rcu_utilization(TPS("End RCU core")); } /* @@ -2950,7 +2969,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) rdp->completed = rnp->completed; rdp->passed_quiesce = 0; rdp->qs_pending = 0; - trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpuonl"); + trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl")); } raw_spin_unlock(&rnp->lock); /* irqs already disabled. */ rnp = rnp->parent; @@ -2980,7 +2999,7 @@ static int rcu_cpu_notify(struct notifier_block *self, struct rcu_node *rnp = rdp->mynode; struct rcu_state *rsp; - trace_rcu_utilization("Start CPU hotplug"); + trace_rcu_utilization(TPS("Start CPU hotplug")); switch (action) { case CPU_UP_PREPARE: case CPU_UP_PREPARE_FROZEN: @@ -3009,7 +3028,7 @@ static int rcu_cpu_notify(struct notifier_block *self, default: break; } - trace_rcu_utilization("End CPU hotplug"); + trace_rcu_utilization(TPS("End CPU hotplug")); return NOTIFY_OK; } -- cgit v1.2.3 From d1d74d14e98a6be740a6f12456c7d9ad47be9c9c Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 22 Apr 2013 00:12:42 +0200 Subject: rcu: Expedite grace periods during suspend/resume MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CONFIG_RCU_FAST_NO_HZ can increase grace-period durations by up to a factor of four, which can result in long suspend and resume times. Thus, this commit temporarily switches to expedited grace periods when suspending the box and return to normal settings when resuming. Similar logic is applied to hibernation. Because expedited grace periods are of dubious benefit on very large systems, so this commit restricts their automated use during suspend and resume to systems of 256 or fewer CPUs. (Some day a number of Linux-kernel facilities, including RCU's expedited grace periods, will be more scalable, but I need to see bug reports first.) [ paulmck: This also papers over an audio/irq bug, but hopefully that will be fixed soon. ] Signed-off-by: Borislav Petkov Signed-off-by: Bjørn Mork Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcutree.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 338f1d1c1c66..a7bf517b0482 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -54,6 +54,7 @@ #include #include #include +#include #include "rcutree.h" #include @@ -3032,6 +3033,25 @@ static int rcu_cpu_notify(struct notifier_block *self, return NOTIFY_OK; } +static int rcu_pm_notify(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + switch (action) { + case PM_HIBERNATION_PREPARE: + case PM_SUSPEND_PREPARE: + if (nr_cpu_ids <= 256) /* Expediting bad for large systems. */ + rcu_expedited = 1; + break; + case PM_POST_HIBERNATION: + case PM_POST_SUSPEND: + rcu_expedited = 0; + break; + default: + break; + } + return NOTIFY_OK; +} + /* * Spawn the kthread that handles this RCU flavor's grace periods. */ @@ -3273,6 +3293,7 @@ void __init rcu_init(void) * or the scheduler are operational. */ cpu_notifier(rcu_cpu_notify, 0); + pm_notifier(rcu_pm_notify, 0); for_each_online_cpu(cpu) rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu); } -- cgit v1.2.3 From ae15018456c44b742d352af323e0b89eae4a6383 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 23 Apr 2013 13:20:57 -0700 Subject: rcu: Make call_rcu() leak callbacks for debug-object errors If someone does a duplicate call_rcu(), the worst thing the second call_rcu() could do would be to actually queue the callback the second time because doing so corrupts whatever list the callback was already queued on. This commit therefore makes __call_rcu() check the new return value from debug-objects and leak the callback upon error. This commit also substitutes rcu_leak_callback() for whatever callback function was previously in place in order to avoid freeing the callback out from under any readers that might still be referencing it. These changes increase the probability that the debug-objects error messages will actually make it somewhere visible. Signed-off-by: Paul E. McKenney Cc: Mathieu Desnoyers Cc: Sedat Dilek Cc: Davidlohr Bueso Cc: Rik van Riel Cc: Thomas Gleixner Cc: Linus Torvalds Tested-by: Sedat Dilek Reviewed-by: Josh Triplett --- kernel/rcutree.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index a7bf517b0482..91840566e294 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -2304,6 +2304,13 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp, } } +/* + * RCU callback function to leak a callback. + */ +static void rcu_leak_callback(struct rcu_head *rhp) +{ +} + /* * Helper function for call_rcu() and friends. The cpu argument will * normally be -1, indicating "currently running CPU". It may specify @@ -2318,7 +2325,12 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), struct rcu_data *rdp; WARN_ON_ONCE((unsigned long)head & 0x3); /* Misaligned rcu_head! */ - debug_rcu_head_queue(head); + if (debug_rcu_head_queue(head)) { + /* Probable double call_rcu(), so leak the callback. */ + ACCESS_ONCE(head->func) = rcu_leak_callback; + WARN_ONCE(1, "__call_rcu(): Leaked duplicate callback\n"); + return; + } head->func = func; head->next = NULL; -- cgit v1.2.3 From 1eafd31c640d6799c63136246a59d608bed93c74 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 20 Jun 2013 13:50:40 -0700 Subject: rcu: Avoid redundant grace-period kthread wakeups When setting up an in-the-future "advanced" grace period, the code needs to wake up the relevant grace-period kthread, which it currently does unconditionally. However, this results in needless wakeups in the case where the advanced grace period is being set up by the grace-period kthread itself, which is a non-uncommon situation. This commit therefore checks to see if the running thread is the grace-period kthread, and avoids doing the irq_work_queue()-mediated wakeup in that case. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcutree.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 91840566e294..c6a064abd6a0 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1576,10 +1576,12 @@ rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp, /* * We can't do wakeups while holding the rnp->lock, as that - * could cause possible deadlocks with the rq->lock. Deter - * the wakeup to interrupt context. + * could cause possible deadlocks with the rq->lock. Defer + * the wakeup to interrupt context. And don't bother waking + * up the running kthread. */ - irq_work_queue(&rsp->wakeup_work); + if (current != rsp->gp_kthread) + irq_work_queue(&rsp->wakeup_work); } /* -- cgit v1.2.3 From feed66ed26a53e700ca02ce1744fed7d0c647292 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 9 May 2013 08:55:54 -0700 Subject: rcu: Eliminate unused APIs intended for adaptive ticks The rcu_user_enter_after_irq() and rcu_user_exit_after_irq() functions were intended for use by adaptive ticks, but changes in implementation have rendered them unnecessary. This commit therefore removes them. Reported-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcutree.c | 43 ------------------------------------------- 1 file changed, 43 deletions(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 338f1d1c1c66..8807019138c6 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -444,27 +444,6 @@ void rcu_user_enter(void) { rcu_eqs_enter(1); } - -/** - * rcu_user_enter_after_irq - inform RCU that we are going to resume userspace - * after the current irq returns. - * - * This is similar to rcu_user_enter() but in the context of a non-nesting - * irq. After this call, RCU enters into idle mode when the interrupt - * returns. - */ -void rcu_user_enter_after_irq(void) -{ - unsigned long flags; - struct rcu_dynticks *rdtp; - - local_irq_save(flags); - rdtp = &__get_cpu_var(rcu_dynticks); - /* Ensure this irq is interrupting a non-idle RCU state. */ - WARN_ON_ONCE(!(rdtp->dynticks_nesting & DYNTICK_TASK_MASK)); - rdtp->dynticks_nesting = 1; - local_irq_restore(flags); -} #endif /* CONFIG_RCU_USER_QS */ /** @@ -581,28 +560,6 @@ void rcu_user_exit(void) { rcu_eqs_exit(1); } - -/** - * rcu_user_exit_after_irq - inform RCU that we won't resume to userspace - * idle mode after the current non-nesting irq returns. - * - * This is similar to rcu_user_exit() but in the context of an irq. - * This is called when the irq has interrupted a userspace RCU idle mode - * context. When the current non-nesting interrupt returns after this call, - * the CPU won't restore the RCU idle mode. - */ -void rcu_user_exit_after_irq(void) -{ - unsigned long flags; - struct rcu_dynticks *rdtp; - - local_irq_save(flags); - rdtp = &__get_cpu_var(rcu_dynticks); - /* Ensure we are interrupting an RCU idle mode. */ - WARN_ON_ONCE(rdtp->dynticks_nesting & DYNTICK_TASK_NEST_MASK); - rdtp->dynticks_nesting += DYNTICK_TASK_EXIT_IDLE; - local_irq_restore(flags); -} #endif /* CONFIG_RCU_USER_QS */ /** -- cgit v1.2.3 From 2333210b26cf7aaf48d71343029afb860103d9f9 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 21 Jun 2013 12:34:33 -0700 Subject: nohz_full: Add rcu_dyntick data for scalable detection of all-idle state This commit adds fields to the rcu_dyntick structure that are used to detect idle CPUs. These new fields differ from the existing ones in that the existing ones consider a CPU executing in user mode to be idle, where the new ones consider CPUs executing in user mode to be busy. The handling of these new fields is otherwise quite similar to that for the exiting fields. This commit also adds the initialization required for these fields. So, why is usermode execution treated differently, with RCU considering it a quiescent state equivalent to idle, while in contrast the new full-system idle state detection considers usermode execution to be non-idle? It turns out that although one of RCU's quiescent states is usermode execution, it is not a full-system idle state. This is because the purpose of the full-system idle state is not RCU, but rather determining when accurate timekeeping can safely be disabled. Whenever accurate timekeeping is required in a CONFIG_NO_HZ_FULL kernel, at least one CPU must keep the scheduling-clock tick going. If even one CPU is executing in user mode, accurate timekeeping is requires, particularly for architectures where gettimeofday() and friends do not enter the kernel. Only when all CPUs are really and truly idle can accurate timekeeping be disabled, allowing all CPUs to turn off the scheduling clock interrupt, thus greatly improving energy efficiency. This naturally raises the question "Why is this code in RCU rather than in timekeeping?", and the answer is that RCU has the data and infrastructure to efficiently make this determination. Signed-off-by: Paul E. McKenney Acked-by: Frederic Weisbecker Cc: Steven Rostedt Reviewed-by: Josh Triplett --- kernel/rcutree.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 8807019138c6..4f27b85d8c86 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -224,6 +224,10 @@ EXPORT_SYMBOL_GPL(rcu_note_context_switch); DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE, .dynticks = ATOMIC_INIT(1), +#ifdef CONFIG_NO_HZ_FULL_SYSIDLE + .dynticks_idle_nesting = DYNTICK_TASK_NEST_VALUE, + .dynticks_idle = ATOMIC_INIT(1), +#endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ }; static long blimit = 10; /* Maximum callbacks per rcu_do_batch. */ @@ -2904,6 +2908,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) rdp->blimit = blimit; init_callback_list(rdp); /* Re-enable callbacks on this CPU. */ rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; + rcu_sysidle_init_percpu_data(rdp->dynticks); atomic_set(&rdp->dynticks->dynticks, (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1); raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ -- cgit v1.2.3 From eb348b898290da242e46df75ab0b9772003e08b8 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 21 Jun 2013 13:00:57 -0700 Subject: nohz_full: Add per-CPU idle-state tracking This commit adds the code that updates the rcu_dyntick structure's new fields to track the per-CPU idle state based on interrupts and transitions into and out of the idle loop (NMIs are ignored because NMI handlers cannot cleanly read out the time anyway). This code is similar to the code that maintains RCU's idea of per-CPU idleness, but differs in that RCU treats CPUs running in user mode as idle, where this new code does not. Signed-off-by: Paul E. McKenney Acked-by: Frederic Weisbecker Cc: Steven Rostedt Reviewed-by: Josh Triplett --- kernel/rcutree.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 4f27b85d8c86..b0d2cc3ea15a 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -431,6 +431,7 @@ void rcu_idle_enter(void) local_irq_save(flags); rcu_eqs_enter(false); + rcu_sysidle_enter(&__get_cpu_var(rcu_dynticks), 0); local_irq_restore(flags); } EXPORT_SYMBOL_GPL(rcu_idle_enter); @@ -481,6 +482,7 @@ void rcu_irq_exit(void) trace_rcu_dyntick(TPS("--="), oldval, rdtp->dynticks_nesting); else rcu_eqs_enter_common(rdtp, oldval, true); + rcu_sysidle_enter(rdtp, 1); local_irq_restore(flags); } @@ -549,6 +551,7 @@ void rcu_idle_exit(void) local_irq_save(flags); rcu_eqs_exit(false); + rcu_sysidle_exit(&__get_cpu_var(rcu_dynticks), 0); local_irq_restore(flags); } EXPORT_SYMBOL_GPL(rcu_idle_exit); @@ -600,6 +603,7 @@ void rcu_irq_enter(void) trace_rcu_dyntick(TPS("++="), oldval, rdtp->dynticks_nesting); else rcu_eqs_exit_common(rdtp, oldval, true); + rcu_sysidle_exit(rdtp, 1); local_irq_restore(flags); } -- cgit v1.2.3 From 217af2a2ffbfc1498d1cf3a89fa478b5632df8f7 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 21 Jun 2013 15:39:06 -0700 Subject: nohz_full: Add full-system-idle arguments to API This commit adds an isidle and jiffies argument to force_qs_rnp(), dyntick_save_progress_counter(), and rcu_implicit_dynticks_qs() to enable RCU's force-quiescent-state process to check for full-system idle. Signed-off-by: Paul E. McKenney Cc: Frederic Weisbecker Cc: Steven Rostedt Cc: Lai Jiangshan [ paulmck: Use true and false for boolean constants per Lai Jiangshan. ] Reviewed-by: Josh Triplett --- kernel/rcutree.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index b0d2cc3ea15a..7b5be56d95ae 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -246,7 +246,10 @@ module_param(jiffies_till_next_fqs, ulong, 0644); static void rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp); -static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *)); +static void force_qs_rnp(struct rcu_state *rsp, + int (*f)(struct rcu_data *rsp, bool *isidle, + unsigned long *maxj), + bool *isidle, unsigned long *maxj); static void force_quiescent_state(struct rcu_state *rsp); static int rcu_pending(int cpu); @@ -727,7 +730,8 @@ static int rcu_is_cpu_rrupt_from_idle(void) * credit them with an implicit quiescent state. Return 1 if this CPU * is in dynticks idle mode, which is an extended quiescent state. */ -static int dyntick_save_progress_counter(struct rcu_data *rdp) +static int dyntick_save_progress_counter(struct rcu_data *rdp, + bool *isidle, unsigned long *maxj) { rdp->dynticks_snap = atomic_add_return(0, &rdp->dynticks->dynticks); return (rdp->dynticks_snap & 0x1) == 0; @@ -739,7 +743,8 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp) * idle state since the last call to dyntick_save_progress_counter() * for this same CPU, or by virtue of having been offline. */ -static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) +static int rcu_implicit_dynticks_qs(struct rcu_data *rdp, + bool *isidle, unsigned long *maxj) { unsigned int curr; unsigned int snap; @@ -1361,16 +1366,19 @@ static int rcu_gp_init(struct rcu_state *rsp) int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in) { int fqs_state = fqs_state_in; + bool isidle = false; + unsigned long maxj; struct rcu_node *rnp = rcu_get_root(rsp); rsp->n_force_qs++; if (fqs_state == RCU_SAVE_DYNTICK) { /* Collect dyntick-idle snapshots. */ - force_qs_rnp(rsp, dyntick_save_progress_counter); + force_qs_rnp(rsp, dyntick_save_progress_counter, + &isidle, &maxj); fqs_state = RCU_FORCE_QS; } else { /* Handle dyntick-idle and offline CPUs. */ - force_qs_rnp(rsp, rcu_implicit_dynticks_qs); + force_qs_rnp(rsp, rcu_implicit_dynticks_qs, &isidle, &maxj); } /* Clear flag to prevent immediate re-entry. */ if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) { @@ -2069,7 +2077,10 @@ void rcu_check_callbacks(int cpu, int user) * * The caller must have suppressed start of new grace periods. */ -static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *)) +static void force_qs_rnp(struct rcu_state *rsp, + int (*f)(struct rcu_data *rsp, bool *isidle, + unsigned long *maxj), + bool *isidle, unsigned long *maxj) { unsigned long bit; int cpu; @@ -2093,7 +2104,7 @@ static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *)) bit = 1; for (; cpu <= rnp->grphi; cpu++, bit <<= 1) { if ((rnp->qsmask & bit) != 0 && - f(per_cpu_ptr(rsp->rda, cpu))) + f(per_cpu_ptr(rsp->rda, cpu), isidle, maxj)) mask |= bit; } if (mask != 0) { -- cgit v1.2.3 From 458fb381eacdd23366cfa2fbdf5a467848683e3a Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 26 Jul 2013 20:47:42 -0700 Subject: rcu: Simplify _rcu_barrier() processing This commit drops an unneeded ACCESS_ONCE() and simplifies an "our work is done" check in _rcu_barrier(). This applies feedback from Linus (https://lkml.org/lkml/2013/7/26/777) that he gave to similar code in an unrelated patch. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett [ paulmck: Fix comment to match code, reported by Lai Jiangshan. ] --- kernel/rcutree.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index c6a064abd6a0..a4a04f311cfb 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -2817,9 +2817,20 @@ static void _rcu_barrier(struct rcu_state *rsp) * transition. The "if" expression below therefore rounds the old * value up to the next even number and adds two before comparing. */ - snap_done = ACCESS_ONCE(rsp->n_barrier_done); + snap_done = rsp->n_barrier_done; _rcu_barrier_trace(rsp, "Check", -1, snap_done); - if (ULONG_CMP_GE(snap_done, ((snap + 1) & ~0x1) + 2)) { + + /* + * If the value in snap is odd, we needed to wait for the current + * rcu_barrier() to complete, then wait for the next one, in other + * words, we need the value of snap_done to be three larger than + * the value of snap. On the other hand, if the value in snap is + * even, we only had to wait for the next rcu_barrier() to complete, + * in other words, we need the value of snap_done to be only two + * greater than the value of snap. The "(snap + 3) & ~0x1" computes + * this for us (thank you, Linus!). + */ + if (ULONG_CMP_GE(snap_done, (snap + 3) & ~0x1)) { _rcu_barrier_trace(rsp, "EarlyExit", -1, snap_done); smp_mb(); /* caller's subsequent code after above check. */ mutex_unlock(&rsp->barrier_mutex); -- cgit v1.2.3 From 0edd1b1784cbdad55aca2c1293be018f53c0ab1d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 21 Jun 2013 16:37:22 -0700 Subject: nohz_full: Add full-system-idle state machine This commit adds the state machine that takes the per-CPU idle data as input and produces a full-system-idle indication as output. This state machine is driven out of RCU's quiescent-state-forcing mechanism, which invokes rcu_sysidle_check_cpu() to collect per-CPU idle state and then rcu_sysidle_report() to drive the state machine. The full-system-idle state is sampled using rcu_sys_is_idle(), which also drives the state machine if RCU is idle (and does so by forcing RCU to become non-idle). This function returns true if all but the timekeeping CPU (tick_do_timer_cpu) are idle and have been idle long enough to avoid memory contention on the full_sysidle_state state variable. The rcu_sysidle_force_exit() may be called externally to reset the state machine back into non-idle state. For large systems the state machine is driven out of RCU's force-quiescent-state logic, which provides good scalability at the price of millisecond-scale latencies on the transition to full-system-idle state. This is not so good for battery-powered systems, which are usually small enough that they don't need to care about scalability, but which do care deeply about energy efficiency. Small systems therefore drive the state machine directly out of the idle-entry code. The number of CPUs in a "small" system is defined by a new NO_HZ_FULL_SYSIDLE_SMALL Kconfig parameter, which defaults to 8. Note that this is a build-time definition. Signed-off-by: Paul E. McKenney Cc: Frederic Weisbecker Cc: Steven Rostedt Cc: Lai Jiangshan [ paulmck: Use true and false for boolean constants per Lai Jiangshan. ] Reviewed-by: Josh Triplett [ paulmck: Simplify logic and provide better comments for memory barriers, based on review comments and questions by Lai Jiangshan. ] --- kernel/rcutree.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 7b5be56d95ae..eca70f4469c1 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -734,6 +734,7 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp, bool *isidle, unsigned long *maxj) { rdp->dynticks_snap = atomic_add_return(0, &rdp->dynticks->dynticks); + rcu_sysidle_check_cpu(rdp, isidle, maxj); return (rdp->dynticks_snap & 0x1) == 0; } @@ -1373,11 +1374,17 @@ int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in) rsp->n_force_qs++; if (fqs_state == RCU_SAVE_DYNTICK) { /* Collect dyntick-idle snapshots. */ + if (is_sysidle_rcu_state(rsp)) { + isidle = 1; + maxj = jiffies - ULONG_MAX / 4; + } force_qs_rnp(rsp, dyntick_save_progress_counter, &isidle, &maxj); + rcu_sysidle_report_gp(rsp, isidle, maxj); fqs_state = RCU_FORCE_QS; } else { /* Handle dyntick-idle and offline CPUs. */ + isidle = 0; force_qs_rnp(rsp, rcu_implicit_dynticks_qs, &isidle, &maxj); } /* Clear flag to prevent immediate re-entry. */ @@ -2103,9 +2110,12 @@ static void force_qs_rnp(struct rcu_state *rsp, cpu = rnp->grplo; bit = 1; for (; cpu <= rnp->grphi; cpu++, bit <<= 1) { - if ((rnp->qsmask & bit) != 0 && - f(per_cpu_ptr(rsp->rda, cpu), isidle, maxj)) - mask |= bit; + if ((rnp->qsmask & bit) != 0) { + if ((rnp->qsmaskinit & bit) != 0) + *isidle = 0; + if (f(per_cpu_ptr(rsp->rda, cpu), isidle, maxj)) + mask |= bit; + } } if (mask != 0) { -- cgit v1.2.3 From eb75767be0e514f97bf1b5cec763696cfc7f7e2a Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 21 Jun 2013 17:10:40 -0700 Subject: nohz_full: Force RCU's grace-period kthreads onto timekeeping CPU Because RCU's quiescent-state-forcing mechanism is used to drive the full-system-idle state machine, and because this mechanism is executed by RCU's grace-period kthreads, this commit forces these kthreads to run on the timekeeping CPU (tick_do_timer_cpu). To do otherwise would mean that the RCU grace-period kthreads would force the system into non-idle state every time they drove the state machine, which would be just a bit on the futile side. Signed-off-by: Paul E. McKenney Cc: Frederic Weisbecker Cc: Steven Rostedt Cc: Lai Jiangshan Reviewed-by: Josh Triplett --- kernel/rcutree.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index eca70f4469c1..64eaafb6c8f7 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1303,6 +1303,7 @@ static int rcu_gp_init(struct rcu_state *rsp) struct rcu_data *rdp; struct rcu_node *rnp = rcu_get_root(rsp); + rcu_bind_gp_kthread(); raw_spin_lock_irq(&rnp->lock); rsp->gp_flags = 0; /* Clear all flags: New grace period. */ -- cgit v1.2.3