From 8ee2debce32412118cf8c239e0026ace56ea1425 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Fri, 25 Sep 2009 15:20:00 -0700 Subject: x86: Export k8 physical topology To eventually interleave emulated nodes over physical nodes, we need to know the physical topology of the machine without actually registering it. This does the k8 node setup in two parts: detection and registration. NUMA emulation can then used the physical topology detected to setup the address ranges of emulated nodes accordingly. If emulation isn't used, the k8 nodes are registered as normal. Two formals are added to the x86 NUMA setup functions: `acpi' and `k8'. These represent whether ACPI or K8 NUMA has been detected; both cannot be true at the same time. This specifies to the NUMA emulation code whether an underlying physical NUMA topology exists and which interface to use. This patch deals solely with separating the k8 setup path into Northbridge detection and registration steps and leaves the ACPI changes for a subsequent patch. The `acpi' formal is added here, however, to avoid touching all the header files again in the next patch. This approach also ensures emulated nodes will not span physical nodes so the true memory latency is not misrepresented. k8_get_nodes() may now be used to export the k8 physical topology of the machine for NUMA emulation. Signed-off-by: David Rientjes Cc: Andreas Herrmann Cc: Yinghai Lu Cc: Balbir Singh Cc: Ankita Garg Cc: Len Brown LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel/setup.c') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index e09f0e2c14b5..fda0032c25c6 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -106,6 +106,7 @@ #include #include #include +#include #ifdef CONFIG_X86_64 #include #endif @@ -691,6 +692,9 @@ static struct dmi_system_id __initdata bad_bios_dmi_table[] = { void __init setup_arch(char **cmdline_p) { + int acpi = 0; + int k8 = 0; + #ifdef CONFIG_X86_32 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); visws_early_detect(); @@ -937,7 +941,11 @@ void __init setup_arch(char **cmdline_p) acpi_numa_init(); #endif - initmem_init(0, max_pfn); +#ifdef CONFIG_K8_NUMA + k8 = !k8_numa_init(0, max_pfn); +#endif + + initmem_init(0, max_pfn, acpi, k8); #ifdef CONFIG_ACPI_SLEEP /* -- cgit v1.2.3 From 8716273caef7f55f39fe4fc6c69c5f9f197f41f1 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Fri, 25 Sep 2009 15:20:04 -0700 Subject: x86: Export srat physical topology This is the counterpart to "x86: export k8 physical topology" for SRAT. It is not as invasive because the acpi code already seperates node setup into detection and registration steps, with the exception of registering e820 active regions in acpi_numa_memory_affinity_init(). This is now moved to acpi_scan_nodes() if NUMA emulation is disabled or deferred. acpi_numa_init() now returns a value which specifies whether an underlying SRAT was located. If so, that topology can be used by the emulation code to interleave emulated nodes over physical nodes or to register the nodes for ACPI. acpi_get_nodes() may now be used to export the srat physical topology of the machine for NUMA emulation. Signed-off-by: David Rientjes Cc: Andreas Herrmann Cc: Yinghai Lu Cc: Balbir Singh Cc: Ankita Garg Cc: Len Brown LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel/setup.c') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index fda0032c25c6..f89141982702 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -938,11 +938,12 @@ void __init setup_arch(char **cmdline_p) /* * Parse SRAT to discover nodes. */ - acpi_numa_init(); + acpi = acpi_numa_init(); #endif #ifdef CONFIG_K8_NUMA - k8 = !k8_numa_init(0, max_pfn); + if (!acpi) + k8 = !k8_numa_init(0, max_pfn); #endif initmem_init(0, max_pfn, acpi, k8); -- cgit v1.2.3 From a2202aa29289db64ca7988b12343158b67b27f10 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Tue, 10 Nov 2009 09:38:24 +0800 Subject: x86: Under BIOS control, restore AP's APIC_LVTTHMR to the BSP value On platforms where the BIOS handles the thermal monitor interrupt, APIC_LVTTHMR on each logical CPU is programmed to generate a SMI and OS must not touch it. Unfortunately AP bringup sequence using INIT-SIPI-SIPI clears all the LVT entries except the mask bit. Essentially this results in all LVT entries including the thermal monitoring interrupt set to masked (clearing the bios programmed value for APIC_LVTTHMR). And this leads to kernel take over the thermal monitoring interrupt on AP's but not on BSP (leaving the bios programmed value only on BSP). As a result of this, we have seen system hangs when the thermal monitoring interrupt is generated. Fix this by reading the initial value of thermal LVT entry on BSP and if bios has taken over the control, then program the same value on all AP's and leave the thermal monitoring interrupt control on all the logical cpu's to the bios. Signed-off-by: Yong Wang Reviewed-by: Suresh Siddha Cc: Borislav Petkov Cc: Arjan van de Ven LKML-Reference: <20091110013824.GA24940@ywang-moblin2.bj.intel.com> Signed-off-by: Ingo Molnar Cc: stable@kernel.org --- arch/x86/kernel/setup.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86/kernel/setup.c') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index e09f0e2c14b5..179c1f2aa457 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -109,6 +109,7 @@ #ifdef CONFIG_X86_64 #include #endif +#include /* * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. @@ -1024,6 +1025,8 @@ void __init setup_arch(char **cmdline_p) #endif #endif x86_init.oem.banner(); + + mcheck_init(); } #ifdef CONFIG_X86_32 -- cgit v1.2.3 From 196cf0d67acad70ebb2572da489d5cc7066cdd05 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 10 Nov 2009 18:27:23 -0800 Subject: x86: Make sure wakeup trampoline code is below 1MB Instead of using bootmem, try find_e820_area()/reserve_early(), and call acpi_reserve_memory() early, to allocate the wakeup trampoline code area below 1M. This is more reliable, and it also removes a dependency on bootmem. -v2: change function name to acpi_reserve_wakeup_memory(), as suggested by Rafael. Signed-off-by: Yinghai Lu Acked-by: H. Peter Anvin Acked-by: Rafael J. Wysocki Cc: pm list Cc: Len Brown Cc: Linus Torvalds LKML-Reference: <4AFA210B.3020207@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel/setup.c') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index f89141982702..0a6e94ab8339 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -897,6 +897,13 @@ void __init setup_arch(char **cmdline_p) reserve_brk(); +#ifdef CONFIG_ACPI_SLEEP + /* + * Reserve low memory region for sleep support. + * even before init_memory_mapping + */ + acpi_reserve_wakeup_memory(); +#endif init_gbpages(); /* max_pfn_mapped is updated here */ @@ -948,12 +955,6 @@ void __init setup_arch(char **cmdline_p) initmem_init(0, max_pfn, acpi, k8); -#ifdef CONFIG_ACPI_SLEEP - /* - * Reserve low memory region for sleep support. - */ - acpi_reserve_bootmem(); -#endif /* * Find and reserve possible boot-time SMP configuration: */ -- cgit v1.2.3 From 4763ed4d45522b876c97e1f7f4b659d211f75571 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 13 Nov 2009 15:28:16 -0800 Subject: x86, mm: Clean up and simplify NX enablement The 32- and 64-bit code used very different mechanisms for enabling NX, but even the 32-bit code was enabling NX in head_32.S if it is available. Furthermore, we had a bewildering collection of tests for the available of NX. This patch: a) merges the 32-bit set_nx() and the 64-bit check_efer() function into a single x86_configure_nx() function. EFER control is left to the head code. b) eliminates the nx_enabled variable entirely. Things that need to test for NX enablement can verify __supported_pte_mask directly, and cpu_has_nx gives the supported status of NX. Signed-off-by: H. Peter Anvin Cc: Tejun Heo Cc: Brian Gerst Cc: Yinghai Lu Cc: Pekka Enberg Cc: Vegard Nossum Cc: Jeremy Fitzhardinge Cc: Chris Wright LKML-Reference: <1258154897-6770-5-git-send-email-hpa@zytor.com> Acked-by: Kees Cook --- arch/x86/kernel/setup.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel/setup.c') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 0a6e94ab8339..23b7f46bf843 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -787,21 +787,17 @@ void __init setup_arch(char **cmdline_p) strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); *cmdline_p = command_line; -#ifdef CONFIG_X86_64 /* * Must call this twice: Once just to detect whether hardware doesn't * support NX (so that the early EHCI debug console setup can safely * call set_fixmap(), and then again after parsing early parameters to * honor the respective command line option. */ - check_efer(); -#endif + x86_configure_nx(); parse_early_param(); -#ifdef CONFIG_X86_64 - check_efer(); -#endif + x86_configure_nx(); /* Must be before kernel pagetables are setup */ vmi_activate(); -- cgit v1.2.3 From 4b0f3b81eb33ef18283aa71440cccfede1753ae0 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 13 Nov 2009 15:28:17 -0800 Subject: x86, mm: Report state of NX protections during boot It is possible for x86_64 systems to lack the NX bit either due to the hardware lacking support or the BIOS having turned off the CPU capability, so NX status should be reported. Additionally, anyone booting NX-capable CPUs in 32bit mode without PAE will lack NX functionality, so this change provides feedback for that case as well. Signed-off-by: Kees Cook Signed-off-by: H. Peter Anvin LKML-Reference: <1258154897-6770-6-git-send-email-hpa@zytor.com> --- arch/x86/kernel/setup.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel/setup.c') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 23b7f46bf843..d2043a00abc1 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -788,16 +788,17 @@ void __init setup_arch(char **cmdline_p) *cmdline_p = command_line; /* - * Must call this twice: Once just to detect whether hardware doesn't - * support NX (so that the early EHCI debug console setup can safely - * call set_fixmap(), and then again after parsing early parameters to - * honor the respective command line option. + * x86_configure_nx() is called before parse_early_param() to detect + * whether hardware doesn't support NX (so that the early EHCI debug + * console setup can safely call set_fixmap()). It may then be called + * again from within noexec_setup() during parsing early parameters + * to honor the respective command line option. */ x86_configure_nx(); parse_early_param(); - x86_configure_nx(); + x86_report_nx(); /* Must be before kernel pagetables are setup */ vmi_activate(); -- cgit v1.2.3 From 44280733e71ad15377735b42d8538c109c94d7e3 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 22 Nov 2009 17:18:49 -0800 Subject: x86: Change crash kernel to reserve via reserve_early() use find_e820_area()/reserve_early() instead. -v2: address Eric's request, to restore original semantics. will fail, if the provided address can not be used. Signed-off-by: Yinghai Lu Acked-by: Eric W. Biederman LKML-Reference: <4B09E2F9.7040403@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 57 +++++++++++++------------------------------------ 1 file changed, 15 insertions(+), 42 deletions(-) (limited to 'arch/x86/kernel/setup.c') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index d2043a00abc1..e3eae5965e4a 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -487,42 +487,11 @@ static void __init reserve_early_setup_data(void) #ifdef CONFIG_KEXEC -/** - * Reserve @size bytes of crashkernel memory at any suitable offset. - * - * @size: Size of the crashkernel memory to reserve. - * Returns the base address on success, and -1ULL on failure. - */ -static -unsigned long long __init find_and_reserve_crashkernel(unsigned long long size) -{ - const unsigned long long alignment = 16<<20; /* 16M */ - unsigned long long start = 0LL; - - while (1) { - int ret; - - start = find_e820_area(start, ULONG_MAX, size, alignment); - if (start == -1ULL) - return start; - - /* try to reserve it */ - ret = reserve_bootmem_generic(start, size, BOOTMEM_EXCLUSIVE); - if (ret >= 0) - return start; - - start += alignment; - } -} - static inline unsigned long long get_total_mem(void) { unsigned long long total; - total = max_low_pfn - min_low_pfn; -#ifdef CONFIG_HIGHMEM - total += highend_pfn - highstart_pfn; -#endif + total = max_pfn - min_low_pfn; return total << PAGE_SHIFT; } @@ -542,21 +511,25 @@ static void __init reserve_crashkernel(void) /* 0 means: find the address automatically */ if (crash_base <= 0) { - crash_base = find_and_reserve_crashkernel(crash_size); + const unsigned long long alignment = 16<<20; /* 16M */ + + crash_base = find_e820_area(alignment, ULONG_MAX, crash_size, + alignment); if (crash_base == -1ULL) { - pr_info("crashkernel reservation failed. " - "No suitable area found.\n"); + pr_info("crashkernel reservation failed - No suitable area found.\n"); return; } } else { - ret = reserve_bootmem_generic(crash_base, crash_size, - BOOTMEM_EXCLUSIVE); - if (ret < 0) { - pr_info("crashkernel reservation failed - " - "memory is in use\n"); + unsigned long long start; + + start = find_e820_area(crash_base, ULONG_MAX, crash_size, + 1<<20); + if (start != crash_base) { + pr_info("crashkernel reservation failed - memory is in use.\n"); return; } } + reserve_early(crash_base, crash_base + crash_size, "CRASH KERNEL"); printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " "for crashkernel (System RAM: %ldMB)\n", @@ -927,6 +900,8 @@ void __init setup_arch(char **cmdline_p) reserve_initrd(); + reserve_crashkernel(); + vsmp_init(); io_delay_init(); @@ -957,8 +932,6 @@ void __init setup_arch(char **cmdline_p) */ find_smp_config(); - reserve_crashkernel(); - #ifdef CONFIG_X86_64 /* * dma32_reserve_bootmem() allocates bootmem which may conflict -- cgit v1.2.3 From b24c2a925a9837cccf54d50aeac22ba0cbc15455 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 24 Nov 2009 02:48:18 -0800 Subject: x86: Move find_smp_config() earlier and avoid bootmem usage Move the find_smp_config() call to before bootmem is initialized. Use reserve_early() instead of reserve_bootmem() in it. This simplifies the code, we only need to call find_smp_config() once and can remove the now unneeded reserve parameter from x86_init_mpparse::find_smp_config. We thus also reduce x86's dependency on bootmem allocations. Signed-off-by: Yinghai Lu LKML-Reference: <4B0BB9F2.70907@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel/setup.c') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index e3eae5965e4a..cdb6a8a506dd 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -913,6 +913,11 @@ void __init setup_arch(char **cmdline_p) early_acpi_boot_init(); + /* + * Find and reserve possible boot-time SMP configuration: + */ + find_smp_config(); + #ifdef CONFIG_ACPI_NUMA /* * Parse SRAT to discover nodes. @@ -927,11 +932,6 @@ void __init setup_arch(char **cmdline_p) initmem_init(0, max_pfn, acpi, k8); - /* - * Find and reserve possible boot-time SMP configuration: - */ - find_smp_config(); - #ifdef CONFIG_X86_64 /* * dma32_reserve_bootmem() allocates bootmem which may conflict -- cgit v1.2.3 From 9eaa192d8988d621217a9e6071cd403fd6010496 Mon Sep 17 00:00:00 2001 From: "Helight.Xu" Date: Mon, 30 Nov 2009 18:33:51 +0800 Subject: x86: Fix a section mismatch in arch/x86/kernel/setup.c copy_edd() should be __init. warning msg: WARNING: vmlinux.o(.text+0x7759): Section mismatch in reference from the function copy_edd() to the variable .init.data:boot_params The function copy_edd() references the variable __initdata boot_params. This is often because copy_edd lacks a __initdata annotation or the annotation of boot_params is wrong. Signed-off-by: ZhenwenXu LKML-Reference: <4B139F8F.4000907@gmail.com> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/setup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel/setup.c') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index e09f0e2c14b5..e020b2d03b4f 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -247,7 +247,7 @@ EXPORT_SYMBOL(edd); * from boot_params into a safe place. * */ -static inline void copy_edd(void) +static inline void __init copy_edd(void) { memcpy(edd.mbr_signature, boot_params.edd_mbr_sig_buffer, sizeof(edd.mbr_signature)); @@ -256,7 +256,7 @@ static inline void copy_edd(void) edd.edd_info_nr = boot_params.eddbuf_entries; } #else -static inline void copy_edd(void) +static inline void __init copy_edd(void) { } #endif -- cgit v1.2.3