From b02fcf9ba1211097754b286043cd87a8b4907e75 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 4 Dec 2017 15:07:09 +0100 Subject: x86/unwinder: Handle stack overflows more gracefully There are at least two unwinder bugs hindering the debugging of stack-overflow crashes: - It doesn't deal gracefully with the case where the stack overflows and the stack pointer itself isn't on a valid stack but the to-be-dereferenced data *is*. - The ORC oops dump code doesn't know how to print partial pt_regs, for the case where if we get an interrupt/exception in *early* entry code before the full pt_regs have been saved. Fix both issues. http://lkml.kernel.org/r/20171126024031.uxi4numpbjm5rlbr@treble Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Link: https://lkml.kernel.org/r/20171204150605.071425003@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/dumpstack.c | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel/dumpstack.c') diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index f13b4c00a5de..0bc95be5c638 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -50,6 +50,28 @@ static void printk_stack_address(unsigned long address, int reliable, printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address); } +void show_iret_regs(struct pt_regs *regs) +{ + printk(KERN_DEFAULT "RIP: %04x:%pS\n", (int)regs->cs, (void *)regs->ip); + printk(KERN_DEFAULT "RSP: %04x:%016lx EFLAGS: %08lx", (int)regs->ss, + regs->sp, regs->flags); +} + +static void show_regs_safe(struct stack_info *info, struct pt_regs *regs) +{ + if (on_stack(info, regs, sizeof(*regs))) + __show_regs(regs, 0); + else if (on_stack(info, (void *)regs + IRET_FRAME_OFFSET, + IRET_FRAME_SIZE)) { + /* + * When an interrupt or exception occurs in entry code, the + * full pt_regs might not have been saved yet. In that case + * just print the iret frame. + */ + show_iret_regs(regs); + } +} + void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, char *log_lvl) { @@ -94,8 +116,8 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, if (stack_name) printk("%s <%s>\n", log_lvl, stack_name); - if (regs && on_stack(&stack_info, regs, sizeof(*regs))) - __show_regs(regs, 0); + if (regs) + show_regs_safe(&stack_info, regs); /* * Scan the stack, printing any text addresses we find. At the @@ -119,7 +141,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, /* * Don't print regs->ip again if it was already printed - * by __show_regs() below. + * by show_regs_safe() below. */ if (regs && stack == ®s->ip) goto next; @@ -155,8 +177,8 @@ next: /* if the frame has entry regs, print them */ regs = unwind_get_entry_regs(&state); - if (regs && on_stack(&stack_info, regs, sizeof(*regs))) - __show_regs(regs, 0); + if (regs) + show_regs_safe(&stack_info, regs); } if (stack_name) -- cgit v1.2.3 From 33a2f1a6c4d7c0a02d1c006fb0379cc5ca3b96bb Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 4 Dec 2017 15:07:13 +0100 Subject: x86/dumpstack: Add get_stack_info() support for the SYSENTER stack get_stack_info() doesn't currently know about the SYSENTER stack, so unwinding will fail if we entered the kernel on the SYSENTER stack and haven't fully switched off. Teach get_stack_info() about the SYSENTER stack. With future patches applied that run part of the entry code on the SYSENTER stack and introduce an intentional BUG(), I would get: PANIC: double fault, error_code: 0x0 ... RIP: 0010:do_error_trap+0x33/0x1c0 ... Call Trace: Code: ... With this patch, I get: PANIC: double fault, error_code: 0x0 ... Call Trace: ? async_page_fault+0x36/0x60 ? invalid_op+0x22/0x40 ? async_page_fault+0x36/0x60 ? sync_regs+0x3c/0x40 ? sync_regs+0x2e/0x40 ? error_entry+0x6c/0xd0 ? async_page_fault+0x36/0x60 Code: ... which is a lot more informative. Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Link: https://lkml.kernel.org/r/20171204150605.392711508@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/dumpstack.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'arch/x86/kernel/dumpstack.c') diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 0bc95be5c638..a33a1373a252 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -43,6 +43,25 @@ bool in_task_stack(unsigned long *stack, struct task_struct *task, return true; } +bool in_sysenter_stack(unsigned long *stack, struct stack_info *info) +{ + struct tss_struct *tss = this_cpu_ptr(&cpu_tss); + + /* Treat the canary as part of the stack for unwinding purposes. */ + void *begin = &tss->SYSENTER_stack_canary; + void *end = (void *)&tss->SYSENTER_stack + sizeof(tss->SYSENTER_stack); + + if ((void *)stack < begin || (void *)stack >= end) + return false; + + info->type = STACK_TYPE_SYSENTER; + info->begin = begin; + info->end = end; + info->next_sp = NULL; + + return true; +} + static void printk_stack_address(unsigned long address, int reliable, char *log_lvl) { -- cgit v1.2.3 From 6e60e583426c2f8751c22c2dfe5c207083b4483a Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 4 Dec 2017 15:07:18 +0100 Subject: x86/dumpstack: Handle stack overflow on all stacks We currently special-case stack overflow on the task stack. We're going to start putting special stacks in the fixmap with a custom layout, so they'll have guard pages, too. Teach the unwinder to be able to unwind an overflow of any of the stacks. Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Link: https://lkml.kernel.org/r/20171204150605.802057305@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/dumpstack.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) (limited to 'arch/x86/kernel/dumpstack.c') diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index a33a1373a252..64f8ed2a4827 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -112,24 +112,28 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, * - task stack * - interrupt stack * - HW exception stacks (double fault, nmi, debug, mce) + * - SYSENTER stack * - * x86-32 can have up to three stacks: + * x86-32 can have up to four stacks: * - task stack * - softirq stack * - hardirq stack + * - SYSENTER stack */ for (regs = NULL; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) { const char *stack_name; - /* - * If we overflowed the task stack into a guard page, jump back - * to the bottom of the usable stack. - */ - if (task_stack_page(task) - (void *)stack < PAGE_SIZE) - stack = task_stack_page(task); - - if (get_stack_info(stack, task, &stack_info, &visit_mask)) - break; + if (get_stack_info(stack, task, &stack_info, &visit_mask)) { + /* + * We weren't on a valid stack. It's possible that + * we overflowed a valid stack into a guard page. + * See if the next page up is valid so that we can + * generate some kind of backtrace if this happens. + */ + stack = (unsigned long *)PAGE_ALIGN((unsigned long)stack); + if (get_stack_info(stack, task, &stack_info, &visit_mask)) + break; + } stack_name = stack_type_name(stack_info.type); if (stack_name) -- cgit v1.2.3 From 72f5e08dbba2d01aa90b592cf76c378ea233b00b Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 4 Dec 2017 15:07:20 +0100 Subject: x86/entry: Remap the TSS into the CPU entry area This has a secondary purpose: it puts the entry stack into a region with a well-controlled layout. A subsequent patch will take advantage of this to streamline the SYSCALL entry code to be able to find it more easily. Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Reviewed-by: Borislav Petkov Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Link: https://lkml.kernel.org/r/20171204150605.962042855@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/dumpstack.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel/dumpstack.c') diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 64f8ed2a4827..60267850125e 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -45,7 +45,8 @@ bool in_task_stack(unsigned long *stack, struct task_struct *task, bool in_sysenter_stack(unsigned long *stack, struct stack_info *info) { - struct tss_struct *tss = this_cpu_ptr(&cpu_tss); + int cpu = smp_processor_id(); + struct tss_struct *tss = &get_cpu_entry_area(cpu)->tss; /* Treat the canary as part of the stack for unwinding purposes. */ void *begin = &tss->SYSENTER_stack_canary; -- cgit v1.2.3 From 7fbbd5cbebf118a9e09f5453f686656a167c3d1c Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 4 Dec 2017 15:07:27 +0100 Subject: x86/entry/64: Remove the SYSENTER stack canary Now that the SYSENTER stack has a guard page, there's no need for a canary to detect overflow after the fact. Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Reviewed-by: Borislav Petkov Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Link: https://lkml.kernel.org/r/20171204150606.572577316@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/dumpstack.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86/kernel/dumpstack.c') diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 60267850125e..ae1ce2e3f132 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -48,8 +48,7 @@ bool in_sysenter_stack(unsigned long *stack, struct stack_info *info) int cpu = smp_processor_id(); struct tss_struct *tss = &get_cpu_entry_area(cpu)->tss; - /* Treat the canary as part of the stack for unwinding purposes. */ - void *begin = &tss->SYSENTER_stack_canary; + void *begin = &tss->SYSENTER_stack; void *end = (void *)&tss->SYSENTER_stack + sizeof(tss->SYSENTER_stack); if ((void *)stack < begin || (void *)stack >= end) -- cgit v1.2.3 From 0f9a48100fba3f189724ae88a450c2261bf91c80 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 4 Dec 2017 15:07:28 +0100 Subject: x86/entry: Clean up the SYSENTER_stack code The existing code was a mess, mainly because C arrays are nasty. Turn SYSENTER_stack into a struct, add a helper to find it, and do all the obvious cleanups this enables. Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Reviewed-by: Borislav Petkov Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Link: https://lkml.kernel.org/r/20171204150606.653244723@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/dumpstack.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel/dumpstack.c') diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index ae1ce2e3f132..bbd6d986e2d0 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -45,11 +45,10 @@ bool in_task_stack(unsigned long *stack, struct task_struct *task, bool in_sysenter_stack(unsigned long *stack, struct stack_info *info) { - int cpu = smp_processor_id(); - struct tss_struct *tss = &get_cpu_entry_area(cpu)->tss; + struct SYSENTER_stack *ss = cpu_SYSENTER_stack(smp_processor_id()); - void *begin = &tss->SYSENTER_stack; - void *end = (void *)&tss->SYSENTER_stack + sizeof(tss->SYSENTER_stack); + void *begin = ss; + void *end = ss + 1; if ((void *)stack < begin || (void *)stack >= end) return false; -- cgit v1.2.3 From 4fe2d8b11a370af286287a2661de9d4e6c9a145a Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Mon, 4 Dec 2017 17:25:07 -0800 Subject: x86/entry: Rename SYSENTER_stack to CPU_ENTRY_AREA_entry_stack If the kernel oopses while on the trampoline stack, it will print "" even if SYSENTER is not involved. That is rather confusing. The "SYSENTER" stack is used for a lot more than SYSENTER now. Give it a better string to display in stack dumps, and rename the kernel code to match. Also move the 32-bit code over to the new naming even though it still uses the entry stack only for SYSENTER. Signed-off-by: Dave Hansen Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Signed-off-by: Ingo Molnar --- arch/x86/kernel/dumpstack.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel/dumpstack.c') diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index bbd6d986e2d0..1dd3f533d78c 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -43,9 +43,9 @@ bool in_task_stack(unsigned long *stack, struct task_struct *task, return true; } -bool in_sysenter_stack(unsigned long *stack, struct stack_info *info) +bool in_entry_stack(unsigned long *stack, struct stack_info *info) { - struct SYSENTER_stack *ss = cpu_SYSENTER_stack(smp_processor_id()); + struct entry_stack *ss = cpu_entry_stack(smp_processor_id()); void *begin = ss; void *end = ss + 1; @@ -53,7 +53,7 @@ bool in_sysenter_stack(unsigned long *stack, struct stack_info *info) if ((void *)stack < begin || (void *)stack >= end) return false; - info->type = STACK_TYPE_SYSENTER; + info->type = STACK_TYPE_ENTRY; info->begin = begin; info->end = end; info->next_sp = NULL; @@ -111,13 +111,13 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, * - task stack * - interrupt stack * - HW exception stacks (double fault, nmi, debug, mce) - * - SYSENTER stack + * - entry stack * * x86-32 can have up to four stacks: * - task stack * - softirq stack * - hardirq stack - * - SYSENTER stack + * - entry stack */ for (regs = NULL; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) { const char *stack_name; -- cgit v1.2.3 From 92a0f81d89571e3e8759366e050ee05cc545ef99 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 20 Dec 2017 18:51:31 +0100 Subject: x86/cpu_entry_area: Move it out of the fixmap Put the cpu_entry_area into a separate P4D entry. The fixmap gets too big and 0-day already hit a case where the fixmap PTEs were cleared by cleanup_highmap(). Aside of that the fixmap API is a pain as it's all backwards. Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/dumpstack.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel/dumpstack.c') diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 1dd3f533d78c..36b17e0febe8 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -18,6 +18,7 @@ #include #include +#include #include #include -- cgit v1.2.3 From 5f26d76c3fd67c48806415ef8b1116c97beff8ba Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Tue, 19 Dec 2017 22:33:46 +0100 Subject: x86/dumpstack: Indicate in Oops whether PTI is configured and enabled CONFIG_PAGE_TABLE_ISOLATION is relatively new and intrusive feature that may still have some corner cases which could take some time to manifest and be fixed. It would be useful to have Oops messages indicate whether it was enabled for building the kernel, and whether it was disabled during boot. Example of fully enabled: Oops: 0001 [#1] SMP PTI Example of enabled during build, but disabled during boot: Oops: 0001 [#1] SMP NOPTI We can decide to remove this after the feature has been tested in the field long enough. [ tglx: Made it use boot_cpu_has() as requested by Borislav ] Signed-off-by: Vlastimil Babka Signed-off-by: Thomas Gleixner Reviewed-by: Eduardo Valentin Acked-by: Dave Hansen Cc: Andy Lutomirski Cc: Andy Lutomirsky Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Will Deacon Cc: aliguori@amazon.com Cc: bpetkov@suse.de Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: jkosina@suse.cz Cc: keescook@google.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/dumpstack.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel/dumpstack.c') diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 36b17e0febe8..5fa110699ed2 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -297,11 +297,13 @@ int __die(const char *str, struct pt_regs *regs, long err) unsigned long sp; #endif printk(KERN_DEFAULT - "%s: %04lx [#%d]%s%s%s%s\n", str, err & 0xffff, ++die_counter, + "%s: %04lx [#%d]%s%s%s%s%s\n", str, err & 0xffff, ++die_counter, IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "", IS_ENABLED(CONFIG_SMP) ? " SMP" : "", debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "", - IS_ENABLED(CONFIG_KASAN) ? " KASAN" : ""); + IS_ENABLED(CONFIG_KASAN) ? " KASAN" : "", + IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION) ? + (boot_cpu_has(X86_FEATURE_PTI) ? " PTI" : " NOPTI") : ""); if (notify_die(DIE_OOPS, str, regs, err, current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP) -- cgit v1.2.3