From 1717f2096b543cede7a380c858c765c41936bc35 Mon Sep 17 00:00:00 2001 From: Hidehiro Kawai Date: Mon, 14 Dec 2015 11:19:09 +0100 Subject: panic, x86: Fix re-entrance problem due to panic on NMI If panic on NMI happens just after panic() on the same CPU, panic() is recursively called. Kernel stalls, as a result, after failing to acquire panic_lock. To avoid this problem, don't call panic() in NMI context if we've already entered panic(). For that, introduce nmi_panic() macro to reduce code duplication. In the case of panic on NMI, don't return from NMI handlers if another CPU already panicked. Signed-off-by: Hidehiro Kawai Acked-by: Michal Hocko Cc: Aaron Tomlin Cc: Andrew Morton Cc: Andy Lutomirski Cc: Baoquan He Cc: Chris Metcalf Cc: David Hildenbrand Cc: Don Zickus Cc: "Eric W. Biederman" Cc: Frederic Weisbecker Cc: Gobinda Charan Maji Cc: HATAYAMA Daisuke Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Javi Merino Cc: Jonathan Corbet Cc: kexec@lists.infradead.org Cc: linux-doc@vger.kernel.org Cc: lkml Cc: Masami Hiramatsu Cc: Michal Nazarewicz Cc: Nicolas Iooss Cc: Peter Zijlstra Cc: Prarit Bhargava Cc: Rasmus Villemoes Cc: Rusty Russell Cc: Seth Jennings Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Ulrich Obergfell Cc: Vitaly Kuznetsov Cc: Vivek Goyal Link: http://lkml.kernel.org/r/20151210014626.25437.13302.stgit@softrs [ Cleanup comments, fixup formatting. ] Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner --- include/linux/kernel.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux/kernel.h') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 350dfb08aee3..750cc5c7c999 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -445,6 +445,26 @@ extern int sysctl_panic_on_stackoverflow; extern bool crash_kexec_post_notifiers; +/* + * panic_cpu is used for synchronizing panic() and crash_kexec() execution. It + * holds a CPU number which is executing panic() currently. A value of + * PANIC_CPU_INVALID means no CPU has entered panic() or crash_kexec(). + */ +extern atomic_t panic_cpu; +#define PANIC_CPU_INVALID -1 + +/* + * A variant of panic() called from NMI context. We return if we've already + * panicked on this CPU. + */ +#define nmi_panic(fmt, ...) \ +do { \ + int cpu = raw_smp_processor_id(); \ + \ + if (atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, cpu) != cpu) \ + panic(fmt, ##__VA_ARGS__); \ +} while (0) + /* * Only to be used by arch init code. If the user over-wrote the default * CONFIG_PANIC_TIMEOUT, honor it. -- cgit v1.2.3 From 58c5661f2144c089bbc2e5d87c9ec1dc1d2964fe Mon Sep 17 00:00:00 2001 From: Hidehiro Kawai Date: Mon, 14 Dec 2015 11:19:10 +0100 Subject: panic, x86: Allow CPUs to save registers even if looping in NMI context Currently, kdump_nmi_shootdown_cpus(), a subroutine of crash_kexec(), sends an NMI IPI to CPUs which haven't called panic() to stop them, save their register information and do some cleanups for crash dumping. However, if such a CPU is infinitely looping in NMI context, we fail to save its register information into the crash dump. For example, this can happen when unknown NMIs are broadcast to all CPUs as follows: CPU 0 CPU 1 =========================== ========================== receive an unknown NMI unknown_nmi_error() panic() receive an unknown NMI spin_trylock(&panic_lock) unknown_nmi_error() crash_kexec() panic() spin_trylock(&panic_lock) panic_smp_self_stop() infinite loop kdump_nmi_shootdown_cpus() issue NMI IPI -----------> blocked until IRET infinite loop... Here, since CPU 1 is in NMI context, the second NMI from CPU 0 is blocked until CPU 1 executes IRET. However, CPU 1 never executes IRET, so the NMI is not handled and the callback function to save registers is never called. In practice, this can happen on some servers which broadcast NMIs to all CPUs when the NMI button is pushed. To save registers in this case, we need to: a) Return from NMI handler instead of looping infinitely or b) Call the callback function directly from the infinite loop Inherently, a) is risky because NMI is also used to prevent corrupted data from being propagated to devices. So, we chose b). This patch does the following: 1. Move the infinite looping of CPUs which haven't called panic() in NMI context (actually done by panic_smp_self_stop()) outside of panic() to enable us to refer pt_regs. Please note that panic_smp_self_stop() is still used for normal context. 2. Call a callback of kdump_nmi_shootdown_cpus() directly to save registers and do some cleanups after setting waiting_for_crash_ipi which is used for counting down the number of CPUs which handled the callback Signed-off-by: Hidehiro Kawai Acked-by: Michal Hocko Cc: Aaron Tomlin Cc: Andrew Morton Cc: Andy Lutomirski Cc: Baoquan He Cc: Chris Metcalf Cc: Dave Young Cc: David Hildenbrand Cc: Don Zickus Cc: Eric Biederman Cc: Frederic Weisbecker Cc: Gobinda Charan Maji Cc: HATAYAMA Daisuke Cc: Hidehiro Kawai Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Javi Merino Cc: Jiang Liu Cc: Jonathan Corbet Cc: kexec@lists.infradead.org Cc: linux-doc@vger.kernel.org Cc: lkml Cc: Masami Hiramatsu Cc: Michal Nazarewicz Cc: Nicolas Iooss Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Prarit Bhargava Cc: Rasmus Villemoes Cc: Seth Jennings Cc: Stefan Lippers-Hollmann Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Ulrich Obergfell Cc: Vitaly Kuznetsov Cc: Vivek Goyal Cc: Yasuaki Ishimatsu Link: http://lkml.kernel.org/r/20151210014628.25437.75256.stgit@softrs [ Cleanup comments, fixup formatting. ] Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner --- include/linux/kernel.h | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'include/linux/kernel.h') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 750cc5c7c999..7311c3294e25 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -255,6 +255,7 @@ extern long (*panic_blink)(int state); __printf(1, 2) void panic(const char *fmt, ...) __noreturn __cold; +void nmi_panic_self_stop(struct pt_regs *); extern void oops_enter(void); extern void oops_exit(void); void print_oops_end_marker(void); @@ -455,14 +456,21 @@ extern atomic_t panic_cpu; /* * A variant of panic() called from NMI context. We return if we've already - * panicked on this CPU. + * panicked on this CPU. If another CPU already panicked, loop in + * nmi_panic_self_stop() which can provide architecture dependent code such + * as saving register state for crash dump. */ -#define nmi_panic(fmt, ...) \ +#define nmi_panic(regs, fmt, ...) \ do { \ - int cpu = raw_smp_processor_id(); \ + int old_cpu, cpu; \ \ - if (atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, cpu) != cpu) \ + cpu = raw_smp_processor_id(); \ + old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, cpu); \ + \ + if (old_cpu == PANIC_CPU_INVALID) \ panic(fmt, ##__VA_ARGS__); \ + else if (old_cpu != cpu) \ + nmi_panic_self_stop(regs); \ } while (0) /* -- cgit v1.2.3 From 8f57e4d930d48217268315898212518d4d3e0773 Mon Sep 17 00:00:00 2001 From: Michal Nazarewicz Date: Fri, 15 Jan 2016 16:57:58 -0800 Subject: include/linux/kernel.h: change abs() macro so it uses consistent return type Rewrite abs() so that its return type does not depend on the architecture and no unexpected type conversion happen inside of it. The only conversion is from unsigned to signed type. char is left as a return type but treated as a signed type regradless of it's actual signedness. With the old version, int arguments were promoted to long and depending on architecture a long argument might result in s64 or long return type (which may or may not be the same). This came after some back and forth with Nicolas. The current macro has different return type (for the same input type) depending on architecture which might be midly iritating. An alternative version would promote to int like so: #define abs(x) __abs_choose_expr(x, long long, \ __abs_choose_expr(x, long, \ __builtin_choose_expr( \ sizeof(x) <= sizeof(int), \ ({ int __x = (x); __x<0?-__x:__x; }), \ ((void)0)))) I have no preference but imagine Linus might. :] Nicolas argument against is that promoting to int causes iconsistent behaviour: int main(void) { unsigned short a = 0, b = 1, c = a - b; unsigned short d = abs(a - b); unsigned short e = abs(c); printf("%u %u\n", d, e); // prints: 1 65535 } Then again, no sane person expects consistent behaviour from C integer arithmetic. ;) Note: __builtin_types_compatible_p(unsigned char, char) is always false, and __builtin_types_compatible_p(signed char, char) is also always false. Signed-off-by: Michal Nazarewicz Reviewed-by: Nicolas Pitre Cc: Srinivas Pandruvada Cc: Wey-Yi Guy Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) (limited to 'include/linux/kernel.h') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 7311c3294e25..f31638c6e873 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -202,26 +202,26 @@ extern int _cond_resched(void); /** * abs - return absolute value of an argument - * @x: the value. If it is unsigned type, it is converted to signed type first - * (s64, long or int depending on its size). + * @x: the value. If it is unsigned type, it is converted to signed type first. + * char is treated as if it was signed (regardless of whether it really is) + * but the macro's return type is preserved as char. * - * Return: an absolute value of x. If x is 64-bit, macro's return type is s64, - * otherwise it is signed long. + * Return: an absolute value of x. */ -#define abs(x) __builtin_choose_expr(sizeof(x) == sizeof(s64), ({ \ - s64 __x = (x); \ - (__x < 0) ? -__x : __x; \ - }), ({ \ - long ret; \ - if (sizeof(x) == sizeof(long)) { \ - long __x = (x); \ - ret = (__x < 0) ? -__x : __x; \ - } else { \ - int __x = (x); \ - ret = (__x < 0) ? -__x : __x; \ - } \ - ret; \ - })) +#define abs(x) __abs_choose_expr(x, long long, \ + __abs_choose_expr(x, long, \ + __abs_choose_expr(x, int, \ + __abs_choose_expr(x, short, \ + __abs_choose_expr(x, char, \ + __builtin_choose_expr( \ + __builtin_types_compatible_p(typeof(x), char), \ + (char)({ signed char __x = (x); __x<0?-__x:__x; }), \ + ((void)0))))))) + +#define __abs_choose_expr(x, type, other) __builtin_choose_expr( \ + __builtin_types_compatible_p(typeof(x), signed type) || \ + __builtin_types_compatible_p(typeof(x), unsigned type), \ + ({ signed type __x = (x); __x < 0 ? -__x : __x; }), other) /** * reciprocal_scale - "scale" a value into range [0, ep_ro) -- cgit v1.2.3