From b8bcfe997e46150fedcc3f5b26b846400122fdd9 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 17 Feb 2009 23:05:19 -0800 Subject: x86/paravirt: remove lazy mode in interrupts Impact: simplification, robustness Make paravirt_lazy_mode() always return PARAVIRT_LAZY_NONE when in an interrupt. This prevents interrupt code from accidentally inheriting an outer lazy state, and instead does everything synchronously. Outer batched operations are left deferred. Signed-off-by: Jeremy Fitzhardinge Acked-by: Peter Zijlstra Cc: Thomas Gleixner --- arch/x86/mm/fault.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'arch/x86/mm/fault.c') diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index a03b7279efa0..cfbb4a738011 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -225,12 +225,10 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) if (!pmd_present(*pmd_k)) return NULL; - if (!pmd_present(*pmd)) { + if (!pmd_present(*pmd)) set_pmd(pmd, *pmd_k); - arch_flush_lazy_mmu_mode(); - } else { + else BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k)); - } return pmd_k; } -- cgit v1.2.3 From a2bcd4731f77cb77ae4b5e4a3d7f5471cf346c33 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 29 Mar 2009 23:47:48 +0200 Subject: x86/mm: further cleanups of fault.c's include file section Impact: cleanup Eliminate more than 20 unnecessary #include lines in fault.c Also fix include file dependency bug in asm/traps.h. (this was masked before, by implicit inclusion) Signed-off-by: Ingo Molnar LKML-Reference: Acked-by: H. Peter Anvin --- arch/x86/mm/fault.c | 44 ++++++++++---------------------------------- 1 file changed, 10 insertions(+), 34 deletions(-) (limited to 'arch/x86/mm/fault.c') diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index a03b7279efa0..24a36a6426ab 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -3,40 +3,16 @@ * Copyright (C) 2001, 2002 Andi Kleen, SuSE Labs. * Copyright (C) 2008-2009, Red Hat Inc., Ingo Molnar */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include -#include -#include +#include /* STACK_END_MAGIC */ +#include /* test_thread_flag(), ... */ +#include /* oops_begin/end, ... */ +#include /* search_exception_table */ +#include /* max_low_pfn */ +#include /* __kprobes, ... */ +#include /* kmmio_handler, ... */ + +#include /* dotraplinkage, ... */ +#include /* pgd_*(), ... */ /* * Page fault error code bits: -- cgit v1.2.3 From 7dd1fcc258b65da718f01e4684a7b9244501a9fb Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 13 Mar 2009 12:21:33 +0100 Subject: perf_counter: provide pagefault software events We use the generic software counter infrastructure to provide page fault events. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- arch/x86/mm/fault.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86/mm/fault.c') diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index a03b7279efa0..c8725752b6cd 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -27,6 +27,7 @@ #include #include #include +#include #include @@ -1044,6 +1045,8 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) if (unlikely(error_code & PF_RSVD)) pgtable_bad(regs, error_code, address); + perf_swcounter_event(PERF_COUNT_PAGE_FAULTS, 1, 0, regs); + /* * If we're in an interrupt, have no user context or are running * in an atomic region then we must not take the fault: -- cgit v1.2.3 From ac17dc8e58f3069ea895cfff963adf98ff3cf6b2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 13 Mar 2009 12:21:34 +0100 Subject: perf_counter: provide major/minor page fault software events Provide separate sw counters for major and minor page faults. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- arch/x86/mm/fault.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/x86/mm/fault.c') diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index c8725752b6cd..f2d3324d9215 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1140,10 +1140,13 @@ good_area: return; } - if (fault & VM_FAULT_MAJOR) + if (fault & VM_FAULT_MAJOR) { tsk->maj_flt++; - else + perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MAJ, 1, 0, regs); + } else { tsk->min_flt++; + perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MIN, 1, 0, regs); + } check_v8086_mode(regs, address, tsk); -- cgit v1.2.3 From 78f13e9525ba777da25c4ddab89f28e9366a8b7c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 8 Apr 2009 15:01:33 +0200 Subject: perf_counter: allow for data addresses to be recorded Paul suggested we allow for data addresses to be recorded along with the traditional IPs as power can provide these. For now, only the software pagefault events provide data addresses, but in the future power might as well for some events. x86 doesn't seem capable of providing this atm. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090408130409.394816925@chello.nl> Signed-off-by: Ingo Molnar --- arch/x86/mm/fault.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/x86/mm/fault.c') diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index f2d3324d9215..6f9df2babe48 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1045,7 +1045,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) if (unlikely(error_code & PF_RSVD)) pgtable_bad(regs, error_code, address); - perf_swcounter_event(PERF_COUNT_PAGE_FAULTS, 1, 0, regs); + perf_swcounter_event(PERF_COUNT_PAGE_FAULTS, 1, 0, regs, address); /* * If we're in an interrupt, have no user context or are running @@ -1142,10 +1142,12 @@ good_area: if (fault & VM_FAULT_MAJOR) { tsk->maj_flt++; - perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MAJ, 1, 0, regs); + perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MAJ, 1, 0, + regs, address); } else { tsk->min_flt++; - perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MIN, 1, 0, regs); + perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MIN, 1, 0, + regs, address); } check_v8086_mode(regs, address, tsk); -- cgit v1.2.3 From a454ab3110175d710f4f9a96226a26ce4d5d5de2 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 3 May 2009 10:09:03 +0200 Subject: x86, mm: fault.c, use printk_once() in is_errata93() Andrew pointed out that the 'once' variable has a needlessly function-global scope. We can in fact eliminate it completely, via the use of printk_once(). [ Impact: cleanup ] Reported-by: Andrew Morton Cc: Linus Torvalds Signed-off-by: Ingo Molnar --- arch/x86/mm/fault.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'arch/x86/mm/fault.c') diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 24a36a6426ab..b9ca6d767dbb 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -514,8 +514,6 @@ bad: static int is_errata93(struct pt_regs *regs, unsigned long address) { #ifdef CONFIG_X86_64 - static int once; - if (address != regs->ip) return 0; @@ -525,10 +523,7 @@ static int is_errata93(struct pt_regs *regs, unsigned long address) address |= 0xffffffffUL << 32; if ((address >= (u64)_stext && address <= (u64)_etext) || (address >= MODULES_VADDR && address <= MODULES_END)) { - if (!once) { - printk(errata93_warning); - once = 1; - } + printk_once(errata93_warning); regs->ip = address; return 1; } -- cgit v1.2.3 From f4dbfa8f3131a84257223393905f7efad0ca5996 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 11 Jun 2009 14:06:28 +0200 Subject: perf_counter: Standardize event names Pure renames only, to PERF_COUNT_HW_* and PERF_COUNT_SW_*. Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/mm/fault.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/mm/fault.c') diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 6f9df2babe48..5c6d816f30b4 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1045,7 +1045,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) if (unlikely(error_code & PF_RSVD)) pgtable_bad(regs, error_code, address); - perf_swcounter_event(PERF_COUNT_PAGE_FAULTS, 1, 0, regs, address); + perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); /* * If we're in an interrupt, have no user context or are running @@ -1142,11 +1142,11 @@ good_area: if (fault & VM_FAULT_MAJOR) { tsk->maj_flt++; - perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MAJ, 1, 0, + perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, regs, address); } else { tsk->min_flt++; - perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MIN, 1, 0, + perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, regs, address); } -- cgit v1.2.3 From f85612967c93b67b10dd240e3e8bf8a0eee9def7 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Fri, 4 Apr 2008 00:53:23 +0200 Subject: x86: add hooks for kmemcheck The hooks that we modify are: - Page fault handler (to handle kmemcheck faults) - Debug exception handler (to hide pages after single-stepping the instruction that caused the page fault) Also redefine memset() to use the optimized version if kmemcheck is enabled. (Thanks to Pekka Enberg for minimizing the impact on the page fault handler.) As kmemcheck doesn't handle MMX/SSE instructions (yet), we also disable the optimized xor code, and rely instead on the generic C implementation in order to avoid false-positive warnings. Signed-off-by: Vegard Nossum [whitespace fixlet] Signed-off-by: Pekka Enberg Signed-off-by: Ingo Molnar [rebased for mainline inclusion] Signed-off-by: Vegard Nossum --- arch/x86/mm/fault.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'arch/x86/mm/fault.c') diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index c6acc6326374..baa0e86adfbc 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -14,6 +14,7 @@ #include /* dotraplinkage, ... */ #include /* pgd_*(), ... */ +#include /* kmemcheck_*(), ... */ /* * Page fault error code bits: @@ -956,6 +957,13 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) /* Get the faulting address: */ address = read_cr2(); + /* + * Detect and handle instructions that would cause a page fault for + * both a tracked kernel page and a userspace page. + */ + if (kmemcheck_active(regs)) + kmemcheck_hide(regs); + if (unlikely(kmmio_fault(regs, address))) return; @@ -973,9 +981,13 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) * protection error (error_code & 9) == 0. */ if (unlikely(fault_in_kernel_space(address))) { - if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) && - vmalloc_fault(address) >= 0) - return; + if (!(error_code & (PF_RSVD | PF_USER | PF_PROT))) { + if (vmalloc_fault(address) >= 0) + return; + + if (kmemcheck_fault(regs, address, error_code)) + return; + } /* Can handle a stale RO->RW TLB: */ if (spurious_fault(error_code, address)) -- cgit v1.2.3 From 5dfaf90f8052327c92fbe3c470a2e6634be296c0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 16 Jun 2009 10:23:32 +0200 Subject: x86: mm: Read cr2 before prefetching the mmap_lock Prefetch instructions can generate spurious faults on certain models of older CPUs. The faults themselves cannot be stopped and they can occur pretty much anywhere - so the way we solve them is that we detect certain patterns and ignore the fault. There is one small path of code where we must not take faults though: the #PF handler execution leading up to the reading of the CR2 (the faulting address). If we take a fault there then we destroy the CR2 value (with that of the prefetching instruction's) and possibly mishandle user-space or kernel-space pagefaults. It turns out that in current upstream we do exactly that: prefetchw(&mm->mmap_sem); /* Get the faulting address: */ address = read_cr2(); This is not good. So turn around the order: first read the cr2 then prefetch the lock address. Reading cr2 is plenty fast (2 cycles) so delaying the prefetch by this amount shouldnt be a big issue performance-wise. [ And this might explain a mystery fault.c warning that sometimes occurs on one an old AMD/Semptron based test-system i have - which does have such prefetch problems. ] Cc: Mathieu Desnoyers Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Nick Piggin Cc: Pekka Enberg Cc: Vegard Nossum Cc: Jeremy Fitzhardinge Cc: Hugh Dickins LKML-Reference: <20090616030522.GA22162@Krystal> Signed-off-by: Ingo Molnar --- arch/x86/mm/fault.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/mm/fault.c') diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index c6acc6326374..0482fa649738 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -951,11 +951,11 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) tsk = current; mm = tsk->mm; - prefetchw(&mm->mmap_sem); - /* Get the faulting address: */ address = read_cr2(); + prefetchw(&mm->mmap_sem); + if (unlikely(kmmio_fault(regs, address))) return; -- cgit v1.2.3 From d06063cc221fdefcab86589e79ddfdb7c0e14b63 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 10 Apr 2009 09:01:23 -0700 Subject: Move FAULT_FLAG_xyz into handle_mm_fault() callers This allows the callers to now pass down the full set of FAULT_FLAG_xyz flags to handle_mm_fault(). All callers have been (mechanically) converted to the new calling convention, there's almost certainly room for architectures to clean up their code and then add FAULT_FLAG_RETRY when that support is added. Signed-off-by: Linus Torvalds --- arch/x86/mm/fault.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/mm/fault.c') diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index c403526d5d15..78a5fff857be 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1113,7 +1113,7 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault: */ - fault = handle_mm_fault(mm, vma, address, write); + fault = handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0); if (unlikely(fault & VM_FAULT_ERROR)) { mm_fault_error(regs, error_code, address, fault); -- cgit v1.2.3