From 771dae81896855d25f7f8746aaf56c0238deafb6 Mon Sep 17 00:00:00 2001 From: Deepthi Dharwar Date: Wed, 30 Nov 2011 02:46:31 +0000 Subject: powerpc/cpuidle: Add cpu_idle_wait() to allow switching of idle routines This patch provides cpu_idle_wait() routine for the powerpc platform which is required by the cpuidle subsystem. This routine is required to change the idle handler on SMP systems. The equivalent routine for x86 is in arch/x86/kernel/process.c but the powerpc implementation is different. cpuidle_disable variable is to enable/disable cpuidle framework if power_save option is set during the boot time. Signed-off-by: Deepthi Dharwar Signed-off-by: Trinabh Gupta Signed-off-by: Arun R Bharadwaj Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/idle.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'arch/powerpc/kernel/idle.c') diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index 39a2baa6ad58..8574b0e81ff1 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c @@ -39,9 +39,13 @@ #define cpu_should_die() 0 #endif +unsigned long cpuidle_disable = IDLE_NO_OVERRIDE; +EXPORT_SYMBOL(cpuidle_disable); + static int __init powersave_off(char *arg) { ppc_md.power_save = NULL; + cpuidle_disable = IDLE_POWERSAVE_OFF; return 0; } __setup("powersave=off", powersave_off); @@ -102,6 +106,29 @@ void cpu_idle(void) } } + +/* + * cpu_idle_wait - Used to ensure that all the CPUs come out of the old + * idle loop and start using the new idle loop. + * Required while changing idle handler on SMP systems. + * Caller must have changed idle handler to the new value before the call. + * This window may be larger on shared systems. + */ +void cpu_idle_wait(void) +{ + int cpu; + smp_mb(); + + /* kick all the CPUs so that they exit out of old idle routine */ + get_online_cpus(); + for_each_online_cpu(cpu) { + if (cpu != smp_processor_id()) + smp_send_reschedule(cpu); + } + put_online_cpus(); +} +EXPORT_SYMBOL_GPL(cpu_idle_wait); + int powersave_nap; #ifdef CONFIG_SYSCTL -- cgit v1.2.3 From 280f06774afedf849f0b34248ed6aff57d0f6908 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 7 Oct 2011 18:22:06 +0200 Subject: nohz: Separate out irq exit and idle loop dyntick logic The tick_nohz_stop_sched_tick() function, which tries to delay the next timer tick as long as possible, can be called from two places: - From the idle loop to start the dytick idle mode - From interrupt exit if we have interrupted the dyntick idle mode, so that we reprogram the next tick event in case the irq changed some internal state that requires this action. There are only few minor differences between both that are handled by that function, driven by the ts->inidle cpu variable and the inidle parameter. The whole guarantees that we only update the dyntick mode on irq exit if we actually interrupted the dyntick idle mode, and that we enter in RCU extended quiescent state from idle loop entry only. Split this function into: - tick_nohz_idle_enter(), which sets ts->inidle to 1, enters dynticks idle mode unconditionally if it can, and enters into RCU extended quiescent state. - tick_nohz_irq_exit() which only updates the dynticks idle mode when ts->inidle is set (ie: if tick_nohz_idle_enter() has been called). To maintain symmetry, tick_nohz_restart_sched_tick() has been renamed into tick_nohz_idle_exit(). This simplifies the code and micro-optimize the irq exit path (no need for local_irq_save there). This also prepares for the split between dynticks and rcu extended quiescent state logics. We'll need this split to further fix illegal uses of RCU in extended quiescent states in the idle loop. Signed-off-by: Frederic Weisbecker Cc: Mike Frysinger Cc: Guan Xuetao Cc: David Miller Cc: Chris Metcalf Cc: Hans-Christian Egtvedt Cc: Ralf Baechle Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: H. Peter Anvin Cc: Russell King Cc: Paul Mackerras Cc: Heiko Carstens Cc: Paul Mundt Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- arch/powerpc/kernel/idle.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel/idle.c') diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index 39a2baa6ad58..878572f70ac5 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c @@ -56,7 +56,7 @@ void cpu_idle(void) set_thread_flag(TIF_POLLING_NRFLAG); while (1) { - tick_nohz_stop_sched_tick(1); + tick_nohz_idle_enter(); while (!need_resched() && !cpu_should_die()) { ppc64_runlatch_off(); @@ -93,7 +93,7 @@ void cpu_idle(void) HMT_medium(); ppc64_runlatch_on(); - tick_nohz_restart_sched_tick(); + tick_nohz_idle_exit(); preempt_enable_no_resched(); if (cpu_should_die()) cpu_die(); -- cgit v1.2.3 From 2bbb6817c0ac1b5f2a68d720f364f98eeb1ac4fd Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 8 Oct 2011 16:01:00 +0200 Subject: nohz: Allow rcu extended quiescent state handling seperately from tick stop It is assumed that rcu won't be used once we switch to tickless mode and until we restart the tick. However this is not always true, as in x86-64 where we dereference the idle notifiers after the tick is stopped. To prepare for fixing this, add two new APIs: tick_nohz_idle_enter_norcu() and tick_nohz_idle_exit_norcu(). If no use of RCU is made in the idle loop between tick_nohz_enter_idle() and tick_nohz_exit_idle() calls, the arch must instead call the new *_norcu() version such that the arch doesn't need to call rcu_idle_enter() and rcu_idle_exit(). Otherwise the arch must call tick_nohz_enter_idle() and tick_nohz_exit_idle() and also call explicitly: - rcu_idle_enter() after its last use of RCU before the CPU is put to sleep. - rcu_idle_exit() before the first use of RCU after the CPU is woken up. Signed-off-by: Frederic Weisbecker Cc: Mike Frysinger Cc: Guan Xuetao Cc: David Miller Cc: Chris Metcalf Cc: Hans-Christian Egtvedt Cc: Ralf Baechle Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: H. Peter Anvin Cc: Russell King Cc: Paul Mackerras Cc: Heiko Carstens Cc: Paul Mundt Signed-off-by: Paul E. McKenney --- arch/powerpc/kernel/idle.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel/idle.c') diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index 878572f70ac5..2e782a36d8f2 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c @@ -56,7 +56,7 @@ void cpu_idle(void) set_thread_flag(TIF_POLLING_NRFLAG); while (1) { - tick_nohz_idle_enter(); + tick_nohz_idle_enter_norcu(); while (!need_resched() && !cpu_should_die()) { ppc64_runlatch_off(); @@ -93,7 +93,7 @@ void cpu_idle(void) HMT_medium(); ppc64_runlatch_on(); - tick_nohz_idle_exit(); + tick_nohz_idle_exit_norcu(); preempt_enable_no_resched(); if (cpu_should_die()) cpu_die(); -- cgit v1.2.3 From a7b152d5342c06e81ab0cf7d18345f69fa7e56b5 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 13 Oct 2011 19:05:12 -0700 Subject: powerpc: Tell RCU about idle after hcall tracing The PowerPC pSeries platform (CONFIG_PPC_PSERIES=y) enables hypervisor-call tracing for CONFIG_TRACEPOINTS=y kernels. One of the hypervisor calls that is traced is the H_CEDE call in the idle loop that tells the hypervisor that this OS instance no longer needs the current CPU. However, tracing uses RCU, so this combination of kernel configuration variables needs to avoid telling RCU about the current CPU's idleness until after the H_CEDE-entry tracing completes on the one hand, and must tell RCU that the the current CPU is no longer idle before the H_CEDE-exit tracing starts. In all other cases, it suffices to inform RCU of CPU idleness upon idle-loop entry and exit. This commit makes the required adjustments. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- arch/powerpc/kernel/idle.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel/idle.c') diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index 2e782a36d8f2..3cd73d1fc427 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c @@ -46,6 +46,12 @@ static int __init powersave_off(char *arg) } __setup("powersave=off", powersave_off); +#if defined(CONFIG_PPC_PSERIES) && defined(CONFIG_TRACEPOINTS) +static const bool idle_uses_rcu = 1; +#else +static const bool idle_uses_rcu; +#endif + /* * The body of the idle task. */ @@ -56,7 +62,10 @@ void cpu_idle(void) set_thread_flag(TIF_POLLING_NRFLAG); while (1) { - tick_nohz_idle_enter_norcu(); + if (idle_uses_rcu) + tick_nohz_idle_enter(); + else + tick_nohz_idle_enter_norcu(); while (!need_resched() && !cpu_should_die()) { ppc64_runlatch_off(); @@ -93,7 +102,10 @@ void cpu_idle(void) HMT_medium(); ppc64_runlatch_on(); - tick_nohz_idle_exit_norcu(); + if (idle_uses_rcu) + tick_nohz_idle_exit(); + else + tick_nohz_idle_exit_norcu(); preempt_enable_no_resched(); if (cpu_should_die()) cpu_die(); -- cgit v1.2.3 From 1268fbc746ea1cd279886a740dcbad4ba5232225 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 17 Nov 2011 18:48:14 +0100 Subject: nohz: Remove tick_nohz_idle_enter_norcu() / tick_nohz_idle_exit_norcu() Those two APIs were provided to optimize the calls of tick_nohz_idle_enter() and rcu_idle_enter() into a single irq disabled section. This way no interrupt happening in-between would needlessly process any RCU job. Now we are talking about an optimization for which benefits have yet to be measured. Let's start simple and completely decouple idle rcu and dyntick idle logics to simplify. Signed-off-by: Frederic Weisbecker Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Peter Zijlstra Reviewed-by: Josh Triplett Signed-off-by: Paul E. McKenney --- arch/powerpc/kernel/idle.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'arch/powerpc/kernel/idle.c') diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index 3cd73d1fc427..9c3cd490b1bd 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c @@ -62,10 +62,10 @@ void cpu_idle(void) set_thread_flag(TIF_POLLING_NRFLAG); while (1) { - if (idle_uses_rcu) - tick_nohz_idle_enter(); - else - tick_nohz_idle_enter_norcu(); + tick_nohz_idle_enter(); + if (!idle_uses_rcu) + rcu_idle_enter(); + while (!need_resched() && !cpu_should_die()) { ppc64_runlatch_off(); @@ -102,10 +102,9 @@ void cpu_idle(void) HMT_medium(); ppc64_runlatch_on(); - if (idle_uses_rcu) - tick_nohz_idle_exit(); - else - tick_nohz_idle_exit_norcu(); + if (!idle_uses_rcu) + rcu_idle_exit(); + tick_nohz_idle_exit(); preempt_enable_no_resched(); if (cpu_should_die()) cpu_die(); -- cgit v1.2.3 From a5ccfee05a439b803640e94584056204501db31c Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 9 Jan 2012 14:29:15 +0000 Subject: powerpc: Fix RCU idle and hcall tracing Tracepoints should not be called inside an rcu_idle_enter/rcu_idle_exit region. Since pSeries calls H_CEDE in the idle loop, we were violating this rule. commit a7b152d5342c (powerpc: Tell RCU about idle after hcall tracing) tried to work around it by delaying the rcu_idle_enter until after we called the hcall tracepoint, but there are a number of issues with it. The hcall tracepoint trampoline code is called conditionally when the tracepoint is enabled. If the tracepoint is not enabled we never call rcu_idle_enter. The idle_uses_rcu check was also done at compile time which breaks multiplatform builds. The simple fix is to avoid tracing H_CEDE and rely on other tracepoints and the hypervisor dispatch trace log to work out if we called H_CEDE. This fixes a hang during boot on pSeries. Signed-off-by: Anton Blanchard Acked-by: Paul E. McKenney Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/idle.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'arch/powerpc/kernel/idle.c') diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index 7c66ce13da89..0a48bf5db6c8 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c @@ -50,12 +50,6 @@ static int __init powersave_off(char *arg) } __setup("powersave=off", powersave_off); -#if defined(CONFIG_PPC_PSERIES) && defined(CONFIG_TRACEPOINTS) -static const bool idle_uses_rcu = 1; -#else -static const bool idle_uses_rcu; -#endif - /* * The body of the idle task. */ @@ -67,8 +61,7 @@ void cpu_idle(void) set_thread_flag(TIF_POLLING_NRFLAG); while (1) { tick_nohz_idle_enter(); - if (!idle_uses_rcu) - rcu_idle_enter(); + rcu_idle_enter(); while (!need_resched() && !cpu_should_die()) { ppc64_runlatch_off(); @@ -106,8 +99,7 @@ void cpu_idle(void) HMT_medium(); ppc64_runlatch_on(); - if (!idle_uses_rcu) - rcu_idle_exit(); + rcu_idle_exit(); tick_nohz_idle_exit(); preempt_enable_no_resched(); if (cpu_should_die()) -- cgit v1.2.3