From 99935a7a59eaca0292c1a5880e10bae03f4a5e3d Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 4 Oct 2009 21:54:24 -0700 Subject: x86/PCI: read root resources from IOH on Intel For intel systems with multi IOH, we should read peer root resources directly from PCI config space, and don't trust _CRS. Signed-off-by: Yinghai Lu Signed-off-by: Jesse Barnes --- arch/x86/pci/Makefile | 1 + arch/x86/pci/amd_bus.c | 45 ++++++++++-------------- arch/x86/pci/bus_numa.h | 26 ++++++++++++++ arch/x86/pci/intel_bus.c | 90 ++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 135 insertions(+), 27 deletions(-) create mode 100644 arch/x86/pci/bus_numa.h create mode 100644 arch/x86/pci/intel_bus.c (limited to 'arch/x86') diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile index d49202e740ea..56d917b556c6 100644 --- a/arch/x86/pci/Makefile +++ b/arch/x86/pci/Makefile @@ -15,3 +15,4 @@ obj-$(CONFIG_X86_NUMAQ) += numaq_32.o obj-y += common.o early.o obj-y += amd_bus.o +obj-$(CONFIG_X86_64) += intel_bus.o diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c index 572ee9782f2a..995f36096a42 100644 --- a/arch/x86/pci/amd_bus.c +++ b/arch/x86/pci/amd_bus.c @@ -10,6 +10,8 @@ #include #endif +#include "bus_numa.h" + /* * This discovers the pcibus <-> node mapping on AMD K8. * also get peer root bus resource for io,mmio @@ -17,25 +19,9 @@ #ifdef CONFIG_X86_64 -/* - * sub bus (transparent) will use entres from 3 to store extra from root, - * so need to make sure have enought slot there, increase PCI_BUS_NUM_RESOURCES? - */ -#define RES_NUM 16 -struct pci_root_info { - char name[12]; - unsigned int res_num; - struct resource res[RES_NUM]; - int bus_min; - int bus_max; - int node; - int link; -}; - -/* 4 at this time, it may become to 32 */ -#define PCI_ROOT_NR 4 -static int pci_root_num; -static struct pci_root_info pci_root_info[PCI_ROOT_NR]; +int pci_root_num; +struct pci_root_info pci_root_info[PCI_ROOT_NR]; +static int found_all_numa_early; void x86_pci_root_bus_res_quirks(struct pci_bus *b) { @@ -48,8 +34,11 @@ void x86_pci_root_bus_res_quirks(struct pci_bus *b) b->resource[1] != &iomem_resource) return; - /* if only one root bus, don't need to anything */ - if (pci_root_num < 2) + if (!pci_root_num) + return; + + /* for amd, if only one root bus, don't need to do anything */ + if (pci_root_num < 2 && found_all_numa_early) return; for (i = 0; i < pci_root_num; i++) { @@ -130,12 +119,15 @@ static void __init update_range(struct res_range *range, size_t start, } } -static void __init update_res(struct pci_root_info *info, size_t start, +void __init update_res(struct pci_root_info *info, size_t start, size_t end, unsigned long flags, int merge) { int i; struct resource *res; + if (start > end) + return; + if (!merge) goto addit; @@ -230,7 +222,6 @@ static int __init early_fill_mp_bus_info(void) int j; unsigned bus; unsigned slot; - int found; int node; int link; int def_node; @@ -247,7 +238,7 @@ static int __init early_fill_mp_bus_info(void) if (!early_pci_allowed()) return -1; - found = 0; + found_all_numa_early = 0; for (i = 0; i < ARRAY_SIZE(pci_probes); i++) { u32 id; u16 device; @@ -261,12 +252,12 @@ static int __init early_fill_mp_bus_info(void) device = (id>>16) & 0xffff; if (pci_probes[i].vendor == vendor && pci_probes[i].device == device) { - found = 1; + found_all_numa_early = 1; break; } } - if (!found) + if (!found_all_numa_early) return 0; pci_root_num = 0; @@ -488,7 +479,7 @@ static int __init early_fill_mp_bus_info(void) info = &pci_root_info[i]; res_num = info->res_num; busnum = info->bus_min; - printk(KERN_DEBUG "bus: [%02x,%02x] on node %x link %x\n", + printk(KERN_DEBUG "bus: [%02x, %02x] on node %x link %x\n", info->bus_min, info->bus_max, info->node, info->link); for (j = 0; j < res_num; j++) { res = &info->res[j]; diff --git a/arch/x86/pci/bus_numa.h b/arch/x86/pci/bus_numa.h new file mode 100644 index 000000000000..4ff126a3e887 --- /dev/null +++ b/arch/x86/pci/bus_numa.h @@ -0,0 +1,26 @@ +#ifdef CONFIG_X86_64 + +/* + * sub bus (transparent) will use entres from 3 to store extra from + * root, so need to make sure we have enought slot there, Should we + * increase PCI_BUS_NUM_RESOURCES? + */ +#define RES_NUM 16 +struct pci_root_info { + char name[12]; + unsigned int res_num; + struct resource res[RES_NUM]; + int bus_min; + int bus_max; + int node; + int link; +}; + +/* 4 at this time, it may become to 32 */ +#define PCI_ROOT_NR 4 +extern int pci_root_num; +extern struct pci_root_info pci_root_info[PCI_ROOT_NR]; + +extern void update_res(struct pci_root_info *info, size_t start, + size_t end, unsigned long flags, int merge); +#endif diff --git a/arch/x86/pci/intel_bus.c b/arch/x86/pci/intel_bus.c new file mode 100644 index 000000000000..b7a55dc55d13 --- /dev/null +++ b/arch/x86/pci/intel_bus.c @@ -0,0 +1,90 @@ +/* + * to read io range from IOH pci conf, need to do it after mmconfig is there + */ + +#include +#include +#include +#include +#include + +#include "bus_numa.h" + +static inline void print_ioh_resources(struct pci_root_info *info) +{ + int res_num; + int busnum; + int i; + + printk(KERN_DEBUG "IOH bus: [%02x, %02x]\n", + info->bus_min, info->bus_max); + res_num = info->res_num; + busnum = info->bus_min; + for (i = 0; i < res_num; i++) { + struct resource *res; + + res = &info->res[i]; + printk(KERN_DEBUG "IOH bus: %02x index %x %s: [%llx, %llx]\n", + busnum, i, + (res->flags & IORESOURCE_IO) ? "io port" : + "mmio", + res->start, res->end); + } +} + +#define IOH_LIO 0x108 +#define IOH_LMMIOL 0x10c +#define IOH_LMMIOH 0x110 +#define IOH_LMMIOH_BASEU 0x114 +#define IOH_LMMIOH_LIMITU 0x118 +#define IOH_LCFGBUS 0x11c + +static void __devinit pci_root_bus_res(struct pci_dev *dev) +{ + u16 word; + u32 dword; + struct pci_root_info *info; + u16 io_base, io_end; + u32 mmiol_base, mmiol_end; + u64 mmioh_base, mmioh_end; + int bus_base, bus_end; + + if (pci_root_num >= PCI_ROOT_NR) { + printk(KERN_DEBUG "intel_bus.c: PCI_ROOT_NR is too small\n"); + return; + } + + info = &pci_root_info[pci_root_num]; + pci_root_num++; + + pci_read_config_word(dev, IOH_LCFGBUS, &word); + bus_base = (word & 0xff); + bus_end = (word & 0xff00) >> 8; + sprintf(info->name, "PCI Bus #%02x", bus_base); + info->bus_min = bus_base; + info->bus_max = bus_end; + + pci_read_config_word(dev, IOH_LIO, &word); + io_base = (word & 0xf0) << (12 - 4); + io_end = (word & 0xf000) | 0xfff; + update_res(info, io_base, io_end, IORESOURCE_IO, 0); + + pci_read_config_dword(dev, IOH_LMMIOL, &dword); + mmiol_base = (dword & 0xff00) << (24 - 8); + mmiol_end = (dword & 0xff000000) | 0xffffff; + update_res(info, mmiol_base, mmiol_end, IORESOURCE_MEM, 0); + + pci_read_config_dword(dev, IOH_LMMIOH, &dword); + mmioh_base = ((u64)(dword & 0xfc00)) << (26 - 10); + mmioh_end = ((u64)(dword & 0xfc000000) | 0x3ffffff); + pci_read_config_dword(dev, IOH_LMMIOH_BASEU, &dword); + mmioh_base |= ((u64)(dword & 0x7ffff)) << 32; + pci_read_config_dword(dev, IOH_LMMIOH_LIMITU, &dword); + mmioh_end |= ((u64)(dword & 0x7ffff)) << 32; + update_res(info, mmioh_base, mmioh_end, IORESOURCE_MEM, 0); + + print_ioh_resources(info); +} + +/* intel IOH */ +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x342e, pci_root_bus_res); -- cgit v1.2.3 From ac1aa47b131416a6ff37eb1005a0a1d2541aad6c Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Mon, 26 Oct 2009 13:20:44 -0700 Subject: PCI: determine CLS more intelligently Till now, CLS has been determined either by arch code or as L1_CACHE_BYTES. Only x86 and ia64 set CLS explicitly and x86 doesn't always get it right. On most configurations, the chance is that firmware configures the correct value during boot. This patch makes pci_init() determine CLS by looking at what firmware has configured. It scans all devices and if all non-zero values agree, the value is used. If none is configured or there is a disagreement, pci_dfl_cache_line_size is used. arch can set the dfl value (via PCI_CACHE_LINE_BYTES or pci_dfl_cache_line_size) or override the actual one. ia64, x86 and sparc64 updated to set the default cls instead of the actual one. While at it, declare pci_cache_line_size and pci_dfl_cache_line_size in pci.h and drop private declarations from arch code. Signed-off-by: Tejun Heo Acked-by: David Miller Acked-by: Greg KH Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Tony Luck Signed-off-by: Jesse Barnes --- arch/x86/pci/common.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 1331fcf26143..fbeec31316cf 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -410,8 +410,6 @@ struct pci_bus * __devinit pcibios_scan_root(int busnum) return bus; } -extern u8 pci_cache_line_size; - int __init pcibios_init(void) { struct cpuinfo_x86 *c = &boot_cpu_data; @@ -426,11 +424,11 @@ int __init pcibios_init(void) * and P4. It's also good for 386/486s (which actually have 16) * as quite a few PCI devices do not support smaller values. */ - pci_cache_line_size = 32 >> 2; + pci_dfl_cache_line_size = 32 >> 2; if (c->x86 >= 6 && c->x86_vendor == X86_VENDOR_AMD) - pci_cache_line_size = 64 >> 2; /* K7 & K8 */ + pci_dfl_cache_line_size = 64 >> 2; /* K7 & K8 */ else if (c->x86 > 6 && c->x86_vendor == X86_VENDOR_INTEL) - pci_cache_line_size = 128 >> 2; /* P4 */ + pci_dfl_cache_line_size = 128 >> 2; /* P4 */ pcibios_resource_survey(); -- cgit v1.2.3 From 76b1a87b217927f905f4b01c586452b2a1d33913 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Wed, 14 Oct 2009 16:31:39 -0400 Subject: x86/PCI: Use generic cacheline sizing instead of per-vendor tests. Instead of the PCI code needing to have code to determine the cacheline size of each processor, use the data the cpu identification code should have already determined during early boot. (The vendor checks are also incomplete, and don't take into account modern CPUs) I've been carrying a variant of this code in Fedora for a while, that prints debug information. There are a number of cases where we are currently setting the PCI cacheline size to 32 bytes, when the CPU cacheline size is 64 bytes. With this patch, we set them both the same. Signed-off-by: Dave Jones Signed-off-by: Jesse Barnes --- arch/x86/pci/common.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index fbeec31316cf..d2552c68e94d 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -420,15 +420,19 @@ int __init pcibios_init(void) } /* - * Assume PCI cacheline size of 32 bytes for all x86s except K7/K8 - * and P4. It's also good for 386/486s (which actually have 16) + * Set PCI cacheline size to that of the CPU if the CPU has reported it. + * (For older CPUs that don't support cpuid, we se it to 32 bytes + * It's also good for 386/486s (which actually have 16) * as quite a few PCI devices do not support smaller values. */ - pci_dfl_cache_line_size = 32 >> 2; - if (c->x86 >= 6 && c->x86_vendor == X86_VENDOR_AMD) - pci_dfl_cache_line_size = 64 >> 2; /* K7 & K8 */ - else if (c->x86 > 6 && c->x86_vendor == X86_VENDOR_INTEL) - pci_dfl_cache_line_size = 128 >> 2; /* P4 */ + if (c->x86_clflush_size > 0) { + pci_dfl_cache_line_size = c->x86_clflush_size >> 2; + printk(KERN_DEBUG "PCI: pci_cache_line_size set to %d bytes\n", + pci_dfl_cache_line_size << 2); + } else { + pci_dfl_cache_line_size = 32 >> 2; + printk(KERN_DEBUG "PCI: Unknown cacheline size. Setting to 32 bytes\n"); + } pcibios_resource_survey(); -- cgit v1.2.3 From 42887b29ced263ec3b8bd26ef157a324789b89d9 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 6 Oct 2009 15:33:49 -0600 Subject: x86/PCI: print resources consistently with %pRt This uses %pRt to print additional resource information (type, size, prefetchability, etc.) consistently. Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/pci/acpi.c | 14 +++++++++++--- arch/x86/pci/i386.c | 12 +++++------- 2 files changed, 16 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 1014eb4bfc37..9b3daf976732 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -7,6 +7,7 @@ #include struct pci_root_info { + struct acpi_device *bridge; char *name; unsigned int res_num; struct resource *res; @@ -107,12 +108,18 @@ setup_resource(struct acpi_resource *acpi_res, void *data) res->child = NULL; if (insert_resource(root, res)) { - printk(KERN_ERR "PCI: Failed to allocate 0x%lx-0x%lx " - "from %s for %s\n", (unsigned long) res->start, - (unsigned long) res->end, root->name, info->name); + dev_err(&info->bridge->dev, "can't allocate %pRt\n", res); } else { info->bus->resource[info->res_num] = res; info->res_num++; + if (addr.translation_offset) + dev_info(&info->bridge->dev, "host bridge window: %pRt " + "(PCI address [%#llx-%#llx])\n", + res, res->start - addr.translation_offset, + res->end - addr.translation_offset); + else + dev_info(&info->bridge->dev, + "host bridge window: %pRt\n", res); } return AE_OK; } @@ -124,6 +131,7 @@ get_current_resources(struct acpi_device *device, int busnum, struct pci_root_info info; size_t size; + info.bridge = device; info.bus = bus; info.res_num = 0; acpi_walk_resources(device->handle, METHOD_NAME__CRS, count_resource, diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index b22d13b0c71d..a70a85de5e84 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -129,7 +129,7 @@ static void __init pcibios_allocate_bus_resources(struct list_head *bus_list) continue; if (!r->start || pci_claim_resource(dev, idx) < 0) { - dev_info(&dev->dev, "BAR %d: can't allocate resource\n", idx); + dev_info(&dev->dev, "BAR %d: can't allocate %pRt\n", idx, r); /* * Something is wrong with the region. * Invalidate the resource to prevent @@ -164,12 +164,10 @@ static void __init pcibios_allocate_resources(int pass) else disabled = !(command & PCI_COMMAND_MEMORY); if (pass == disabled) { - dev_dbg(&dev->dev, "resource %#08llx-%#08llx (f=%lx, d=%d, p=%d)\n", - (unsigned long long) r->start, - (unsigned long long) r->end, - r->flags, disabled, pass); + dev_dbg(&dev->dev, "%pRf (d=%d, p=%d)\n", r, + disabled, pass); if (pci_claim_resource(dev, idx) < 0) { - dev_info(&dev->dev, "BAR %d: can't allocate resource\n", idx); + dev_info(&dev->dev, "BAR %d: can't allocate %pRt\n", idx, r); /* We'll assign a new address later */ r->end -= r->start; r->start = 0; @@ -182,7 +180,7 @@ static void __init pcibios_allocate_resources(int pass) /* Turn the ROM off, leave the resource region, * but keep it unregistered. */ u32 reg; - dev_dbg(&dev->dev, "disabling ROM\n"); + dev_dbg(&dev->dev, "disabling ROM %pRt\n", r); r->flags &= ~IORESOURCE_ROM_ENABLE; pci_read_config_dword(dev, dev->rom_base_reg, ®); -- cgit v1.2.3 From 2992e545ea006992ec9dc91c4fa996ce1e15f921 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 26 Oct 2009 13:21:32 -0800 Subject: x86/PCI/PAT: return EINVAL for pci mmap WC request for !pat_enabled Thomas Schlichter reported: > X.org uses libpciaccess which tries to mmap with write combining enabled via > /sys/bus/pci/devices/*/resource0_wc. Currently, when PAT is not enabled, the > kernel does fall back to uncached mmap. Then libpciaccess thinks it succeeded > mapping with write combining enabled and does not set up suited MTRR entries. > ;-( Instead of silently mapping pci mmap region as UC minus in the case of !pat_enabled and wc request, we can return error. Eric Anholt mentioned that caller (like X) typically follows up with UC minus pci mmap request and if there is a free mtrr slot, caller will manage adding WC mtrr. Jesse Barnes says: > Older versions of libpciaccess will behave better if we do it that way > (iirc it only allocates an MTRR if the resource_wc file doesn't exist or > fails to get mapped). Reported-by: Thomas Schlichter Signed-off-by: Thomas Schlichter Signed-off-by: Suresh Siddha Acked-by: Eric Anholt Acked-by: Jesse Barnes Signed-off-by: Jesse Barnes --- arch/x86/pci/i386.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index a70a85de5e84..52e656f17781 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -280,6 +280,15 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, return -EINVAL; prot = pgprot_val(vma->vm_page_prot); + + /* + * Return error if pat is not enabled and write_combine is requested. + * Caller can followup with UC MINUS request and add a WC mtrr if there + * is a free mtrr slot. + */ + if (!pat_enabled && write_combine) + return -EINVAL; + if (pat_enabled && write_combine) prot |= _PAGE_CACHE_WC; else if (pat_enabled || boot_cpu_data.x86 > 3) -- cgit v1.2.3 From 9a08f7d3506019e3833cd4394ca0d7da0ae3689f Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 23 Oct 2009 15:20:33 -0600 Subject: x86/PCI: allow MMCONFIG above 4GB The current whitelist requires a kernel change for every machine that has MMCONFIG regions above 4GB, even if BIOS provides a correct MCFG table. This patch expands the whitelist to include machines with a rev 1 or newer MCFG table and a DMI_BIOS_DATE of 2010 or later. That way, we only need kernel changes for new machines that provide incorrect MCFG tables. Signed-off-by: Bjorn Helgaas CC: Matthew Wilcox CC: John Keller CC: Yinghai Lu CC: Kenji Kaneshige CC: Andi Kleen Acked-by: Ingo Molnar Signed-off-by: Jesse Barnes --- arch/x86/pci/mmconfig-shared.c | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 602c172d3bd5..02642773c29d 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -527,18 +528,31 @@ reject: static int __initdata known_bridge; -static int acpi_mcfg_64bit_base_addr __initdata = FALSE; - /* The physical address of the MMCONFIG aperture. Set from ACPI tables. */ struct acpi_mcfg_allocation *pci_mmcfg_config; int pci_mmcfg_config_num; -static int __init acpi_mcfg_oem_check(struct acpi_table_mcfg *mcfg) +static int __init acpi_mcfg_check_entry(struct acpi_table_mcfg *mcfg, + struct acpi_mcfg_allocation *cfg) { + int year; + + if (cfg->address < 0xFFFFFFFF) + return 0; + if (!strcmp(mcfg->header.oem_id, "SGI")) - acpi_mcfg_64bit_base_addr = TRUE; + return 0; - return 0; + if (mcfg->header.revision >= 1) { + if (dmi_get_date(DMI_BIOS_DATE, &year, NULL, NULL) && + year >= 2010) + return 0; + } + + printk(KERN_ERR PREFIX "MCFG region for %04x:%02x-%02x at %#llx " + "is above 4GB, ignored\n", cfg->pci_segment, + cfg->start_bus_number, cfg->end_bus_number, cfg->address); + return -EINVAL; } static int __init pci_parse_mcfg(struct acpi_table_header *header) @@ -574,13 +588,8 @@ static int __init pci_parse_mcfg(struct acpi_table_header *header) memcpy(pci_mmcfg_config, &mcfg[1], config_size); - acpi_mcfg_oem_check(mcfg); - for (i = 0; i < pci_mmcfg_config_num; ++i) { - if ((pci_mmcfg_config[i].address > 0xFFFFFFFF) && - !acpi_mcfg_64bit_base_addr) { - printk(KERN_ERR PREFIX - "MMCONFIG not in low 4GB of memory\n"); + if (acpi_mcfg_check_entry(mcfg, &pci_mmcfg_config[i])) { kfree(pci_mmcfg_config); pci_mmcfg_config_num = 0; return -ENODEV; -- cgit v1.2.3 From 1ccbf5344c3daef046d2323190cc6807c44f1917 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 6 Oct 2009 15:11:14 -0700 Subject: xen: move Xen-testing predicates to common header Move xen_domain and related tests out of asm-x86 to xen/xen.h so they can be included whenever they are necessary. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Jesse Barnes --- arch/x86/include/asm/xen/hypervisor.h | 27 --------------------------- arch/x86/xen/enlighten.c | 1 + 2 files changed, 1 insertion(+), 27 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h index d5b7e90c0edf..396ff4cc8ed4 100644 --- a/arch/x86/include/asm/xen/hypervisor.h +++ b/arch/x86/include/asm/xen/hypervisor.h @@ -37,31 +37,4 @@ extern struct shared_info *HYPERVISOR_shared_info; extern struct start_info *xen_start_info; -enum xen_domain_type { - XEN_NATIVE, /* running on bare hardware */ - XEN_PV_DOMAIN, /* running in a PV domain */ - XEN_HVM_DOMAIN, /* running in a Xen hvm domain */ -}; - -#ifdef CONFIG_XEN -extern enum xen_domain_type xen_domain_type; -#else -#define xen_domain_type XEN_NATIVE -#endif - -#define xen_domain() (xen_domain_type != XEN_NATIVE) -#define xen_pv_domain() (xen_domain() && \ - xen_domain_type == XEN_PV_DOMAIN) -#define xen_hvm_domain() (xen_domain() && \ - xen_domain_type == XEN_HVM_DOMAIN) - -#ifdef CONFIG_XEN_DOM0 -#include - -#define xen_initial_domain() (xen_pv_domain() && \ - xen_start_info->flags & SIF_INITDOMAIN) -#else /* !CONFIG_XEN_DOM0 */ -#define xen_initial_domain() (0) -#endif /* CONFIG_XEN_DOM0 */ - #endif /* _ASM_X86_XEN_HYPERVISOR_H */ diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 23a4d80fb39e..5bccd706232c 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -28,6 +28,7 @@ #include #include +#include #include #include #include -- cgit v1.2.3 From af5a8ee05404112f38fb2904747c688bdc31a746 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 14 Oct 2009 10:27:42 -0600 Subject: x86/PCI: use -DDEBUG when CONFIG_PCI_DEBUG set We use dev_dbg() in arch/x86/pci, but there's no easy way to turn it on. Add -DDEBUG when CONFIG_PCI_DEBUG=y, just like we do in drivers/pci. Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/pci/Makefile | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile index 56d917b556c6..d8a0a6279a4d 100644 --- a/arch/x86/pci/Makefile +++ b/arch/x86/pci/Makefile @@ -16,3 +16,7 @@ obj-$(CONFIG_X86_NUMAQ) += numaq_32.o obj-y += common.o early.o obj-y += amd_bus.o obj-$(CONFIG_X86_64) += intel_bus.o + +ifeq ($(CONFIG_PCI_DEBUG),y) +EXTRA_CFLAGS += -DDEBUG +endif -- cgit v1.2.3 From c7dabef8a2c59e6a3de9d66fc35fb6a43ef7172d Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 27 Oct 2009 13:26:47 -0600 Subject: vsprintf: use %pR, %pr instead of %pRt, %pRf Jesse accidentally applied v1 [1] of the patchset instead of v2 [2]. This is the diff between v1 and v2. The changes in this patch are: - tidied vsprintf stack buffer to shrink and compute size more accurately - use %pR for decoding and %pr for "raw" (with type and flags) instead of adding %pRt and %pRf [1] http://lkml.org/lkml/2009/10/6/491 [2] http://lkml.org/lkml/2009/10/13/441 Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/pci/acpi.c | 7 ++++--- arch/x86/pci/i386.c | 11 ++++++----- 2 files changed, 10 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 9b3daf976732..6bf8091d2fd5 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -108,18 +108,19 @@ setup_resource(struct acpi_resource *acpi_res, void *data) res->child = NULL; if (insert_resource(root, res)) { - dev_err(&info->bridge->dev, "can't allocate %pRt\n", res); + dev_err(&info->bridge->dev, + "can't allocate host bridge window %pR\n", res); } else { info->bus->resource[info->res_num] = res; info->res_num++; if (addr.translation_offset) - dev_info(&info->bridge->dev, "host bridge window: %pRt " + dev_info(&info->bridge->dev, "host bridge window %pR " "(PCI address [%#llx-%#llx])\n", res, res->start - addr.translation_offset, res->end - addr.translation_offset); else dev_info(&info->bridge->dev, - "host bridge window: %pRt\n", res); + "host bridge window %pR\n", res); } return AE_OK; } diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index 52e656f17781..d49d17de7b3f 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -129,7 +129,7 @@ static void __init pcibios_allocate_bus_resources(struct list_head *bus_list) continue; if (!r->start || pci_claim_resource(dev, idx) < 0) { - dev_info(&dev->dev, "BAR %d: can't allocate %pRt\n", idx, r); + dev_info(&dev->dev, "BAR %d: can't allocate %pR\n", idx, r); /* * Something is wrong with the region. * Invalidate the resource to prevent @@ -164,10 +164,11 @@ static void __init pcibios_allocate_resources(int pass) else disabled = !(command & PCI_COMMAND_MEMORY); if (pass == disabled) { - dev_dbg(&dev->dev, "%pRf (d=%d, p=%d)\n", r, - disabled, pass); + dev_dbg(&dev->dev, + "BAR %d: claiming %pr (d=%d, p=%d)\n", + idx, r, disabled, pass); if (pci_claim_resource(dev, idx) < 0) { - dev_info(&dev->dev, "BAR %d: can't allocate %pRt\n", idx, r); + dev_info(&dev->dev, "BAR %d: can't claim %pR\n", idx, r); /* We'll assign a new address later */ r->end -= r->start; r->start = 0; @@ -180,7 +181,7 @@ static void __init pcibios_allocate_resources(int pass) /* Turn the ROM off, leave the resource region, * but keep it unregistered. */ u32 reg; - dev_dbg(&dev->dev, "disabling ROM %pRt\n", r); + dev_dbg(&dev->dev, "disabling ROM %pR\n", r); r->flags &= ~IORESOURCE_ROM_ENABLE; pci_read_config_dword(dev, dev->rom_base_reg, ®); -- cgit v1.2.3 From 2a6bed8301f8b019717504575a3f9c6cce1fe271 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 4 Nov 2009 10:32:47 -0700 Subject: x86/PCI: print domain:bus in conventional format Use the dev_printk-like "%04x:%02x" format for printing PCI bus numbers. Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/pci/acpi.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 6bf8091d2fd5..68b89dc7d761 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -172,8 +172,9 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do #endif if (domain && !pci_domains_supported) { - printk(KERN_WARNING "PCI: Multiple domains not supported " - "(dom %d, bus %d)\n", domain, busnum); + printk(KERN_WARNING "pci_bus %04x:%02x: " + "ignored (multiple domains not supported)\n", + domain, busnum); return NULL; } @@ -197,7 +198,8 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do */ sd = kzalloc(sizeof(*sd), GFP_KERNEL); if (!sd) { - printk(KERN_ERR "PCI: OOM, not probing PCI bus %02x\n", busnum); + printk(KERN_WARNING "pci_bus %04x:%02x: " + "ignored (out of memory)\n", domain, busnum); return NULL; } -- cgit v1.2.3 From 865df576e8fc70daf297b53e61a4fbefc719d065 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 4 Nov 2009 10:32:57 -0700 Subject: PCI: improve discovery/configuration messages This makes PCI resource management messages more consistent and adds a few new messages to aid debugging. Whenever we assign resources to a device, update a BAR, or change a bridge aperture, it's worth noting it. Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/pci/i386.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index d49d17de7b3f..b73c09f45210 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -129,7 +129,9 @@ static void __init pcibios_allocate_bus_resources(struct list_head *bus_list) continue; if (!r->start || pci_claim_resource(dev, idx) < 0) { - dev_info(&dev->dev, "BAR %d: can't allocate %pR\n", idx, r); + dev_info(&dev->dev, + "can't reserve window %pR\n", + r); /* * Something is wrong with the region. * Invalidate the resource to prevent @@ -165,10 +167,11 @@ static void __init pcibios_allocate_resources(int pass) disabled = !(command & PCI_COMMAND_MEMORY); if (pass == disabled) { dev_dbg(&dev->dev, - "BAR %d: claiming %pr (d=%d, p=%d)\n", + "BAR %d: reserving %pr (d=%d, p=%d)\n", idx, r, disabled, pass); if (pci_claim_resource(dev, idx) < 0) { - dev_info(&dev->dev, "BAR %d: can't claim %pR\n", idx, r); + dev_info(&dev->dev, + "can't reserve %pR\n", r); /* We'll assign a new address later */ r->end -= r->start; r->start = 0; -- cgit v1.2.3 From f1db6fde09e201218f488d7205a7cd7bc448d496 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 4 Nov 2009 10:39:13 -0700 Subject: x86/PCI: for debuggability, show host bridge windows even when ignoring _CRS We have occasional problems with PCI resource allocation, and sometimes they could be avoided by paying attention to what ACPI tells us about the host bridges. This patch doesn't change the behavior, but it prints window information that should make debugging easier. Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/pci/acpi.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 68b89dc7d761..54db5a04b5e1 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -92,11 +92,12 @@ setup_resource(struct acpi_resource *acpi_res, void *data) start = addr.minimum + addr.translation_offset; end = start + addr.address_length - 1; if (info->res_num >= max_root_bus_resources) { - printk(KERN_WARNING "PCI: Failed to allocate 0x%lx-0x%lx " - "from %s for %s due to _CRS returning more than " - "%d resource descriptors\n", (unsigned long) start, - (unsigned long) end, root->name, info->name, - max_root_bus_resources); + if (pci_probe & PCI_USE__CRS) + printk(KERN_WARNING "PCI: Failed to allocate " + "0x%lx-0x%lx from %s for %s due to _CRS " + "returning more than %d resource descriptors\n", + (unsigned long) start, (unsigned long) end, + root->name, info->name, max_root_bus_resources); return AE_OK; } @@ -107,6 +108,12 @@ setup_resource(struct acpi_resource *acpi_res, void *data) res->end = end; res->child = NULL; + if (!(pci_probe & PCI_USE__CRS)) { + dev_printk(KERN_DEBUG, &info->bridge->dev, + "host bridge window %pR (ignored)\n", res); + return AE_OK; + } + if (insert_resource(root, res)) { dev_err(&info->bridge->dev, "can't allocate host bridge window %pR\n", res); @@ -132,6 +139,11 @@ get_current_resources(struct acpi_device *device, int busnum, struct pci_root_info info; size_t size; + if (!(pci_probe & PCI_USE__CRS)) + dev_info(&device->dev, + "ignoring host bridge windows from ACPI; " + "boot with \"pci=use_crs\" to use them\n"); + info.bridge = device; info.bus = bus; info.res_num = 0; @@ -220,9 +232,7 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do } else { bus = pci_create_bus(NULL, busnum, &pci_root_ops, sd); if (bus) { - if (pci_probe & PCI_USE__CRS) - get_current_resources(device, busnum, domain, - bus); + get_current_resources(device, busnum, domain, bus); bus->subordinate = pci_scan_child_bus(bus); } } -- cgit v1.2.3 From 03db42adfeeabe856dbb6894dd3aaff55838330a Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 4 Nov 2009 10:39:18 -0700 Subject: x86/PCI: fix bogus host bridge window start/end alignment from _CRS PCI device BARs are guaranteed to start and end on at least a four-byte (I/O) or a sixteen-byte (MMIO) boundary because they're aligned on their size and the low BAR bits are reserved. PCI-to-PCI bridge apertures have even larger alignment restrictions. However, some BIOSes (e.g., HP DL360 BIOS P31) report host bridge windows like "[io 0x0000-0x2cfe]". This is wrong because it excludes the last port at 0x2cff: it's impossible for a downstream device to claim 0x2cfe without also claiming 0x2cff. In fact, this BIOS configures a device behind the bridge to "[io 0x2c00-0x2cff]", so we know the window actually does include 0x2cff. This patch rounds the start and end of apertures to the appropriate boundary. I experimentally determined that Windows contains a similar workaround; details here: http://bugzilla.kernel.org/show_bug.cgi?id=14337 Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/pci/acpi.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 54db5a04b5e1..8ddf4f4c7253 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -59,6 +59,30 @@ bus_has_transparent_bridge(struct pci_bus *bus) return false; } +static void +align_resource(struct acpi_device *bridge, struct resource *res) +{ + int align = (res->flags & IORESOURCE_MEM) ? 16 : 4; + + /* + * Host bridge windows are not BARs, but the decoders on the PCI side + * that claim this address space have starting alignment and length + * constraints, so fix any obvious BIOS goofs. + */ + if (res->start & (align - 1)) { + dev_printk(KERN_DEBUG, &bridge->dev, + "host bridge window %pR invalid; " + "aligning start to %d-byte boundary\n", res, align); + res->start &= ~(align - 1); + } + if ((res->end + 1) & (align - 1)) { + dev_printk(KERN_DEBUG, &bridge->dev, + "host bridge window %pR invalid; " + "aligning end to %d-byte boundary\n", res, align); + res->end = roundup(res->end, align) - 1; + } +} + static acpi_status setup_resource(struct acpi_resource *acpi_res, void *data) { @@ -107,6 +131,7 @@ setup_resource(struct acpi_resource *acpi_res, void *data) res->start = start; res->end = end; res->child = NULL; + align_resource(info->bridge, res); if (!(pci_probe & PCI_USE__CRS)) { dev_printk(KERN_DEBUG, &info->bridge->dev, -- cgit v1.2.3 From ea7f1b6ee9dc96c5827b06ba21d7769d553efb7d Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 5 Nov 2009 11:17:11 -0600 Subject: x86/PCI: remove 64-bit division The roundup() caused a build error (undefined reference to `__udivdi3'). We're aligning to power-of-two boundaries, so it's simpler to just use ALIGN() anyway, which avoids the division. Signed-off-by: Bjorn Helgaas Acked-by: Randy Dunlap Signed-off-by: Jesse Barnes --- arch/x86/pci/acpi.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 8ddf4f4c7253..959e548a7039 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -69,17 +69,17 @@ align_resource(struct acpi_device *bridge, struct resource *res) * that claim this address space have starting alignment and length * constraints, so fix any obvious BIOS goofs. */ - if (res->start & (align - 1)) { + if (!IS_ALIGNED(res->start, align)) { dev_printk(KERN_DEBUG, &bridge->dev, "host bridge window %pR invalid; " "aligning start to %d-byte boundary\n", res, align); res->start &= ~(align - 1); } - if ((res->end + 1) & (align - 1)) { + if (!IS_ALIGNED(res->end + 1, align)) { dev_printk(KERN_DEBUG, &bridge->dev, "host bridge window %pR invalid; " "aligning end to %d-byte boundary\n", res, align); - res->end = roundup(res->end, align) - 1; + res->end = ALIGN(res->end, align) - 1; } } -- cgit v1.2.3 From dc186ad741c12ae9ecac8b89e317ef706fdaf8f6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 16 Nov 2009 01:09:48 +0900 Subject: workqueue: Add debugobjects support Add debugobject support to track the life time of work_structs. While at it, remove duplicate definition of INIT_DELAYED_WORK_ON_STACK(). Signed-off-by: Thomas Gleixner Signed-off-by: Tejun Heo --- arch/x86/kernel/smpboot.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 565ebc65920e..ba43dfed353d 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -687,7 +687,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done), }; - INIT_WORK(&c_idle.work, do_fork_idle); + INIT_WORK_ON_STACK(&c_idle.work, do_fork_idle); alternatives_smp_switch(1); @@ -713,6 +713,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) if (IS_ERR(c_idle.idle)) { printk("failed fork for CPU %d\n", cpu); + destroy_work_on_stack(&c_idle.work); return PTR_ERR(c_idle.idle); } @@ -831,6 +832,7 @@ do_rest: smpboot_restore_warm_reset_vector(); } + destroy_work_on_stack(&c_idle.work); return boot_error; } -- cgit v1.2.3 From b8cbe7e82ec8b55d7bbdde66fc69e788fde00dc6 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 3 Nov 2009 14:57:56 +1030 Subject: [CPUFREQ] cpumask: don't put a cpumask on the stack in x86...cpufreq/powernow-k8.c It's still mugging the current process's cpumask, but as comment in 1ff6e97f1d says, it's not a trivial fix. So, at least we can use a cpumask_var_t to do the Wrong Thing the Right Way :) Signed-off-by: Rusty Russell To: cpufreq@vger.kernel.org Cc: Mark Langsdorf Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 3f12dabeab52..f30d25383940 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -1118,7 +1118,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsigned relation) { - cpumask_t oldmask; + cpumask_var_t oldmask; struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); u32 checkfid; u32 checkvid; @@ -1131,9 +1131,13 @@ static int powernowk8_target(struct cpufreq_policy *pol, checkfid = data->currfid; checkvid = data->currvid; - /* only run on specific CPU from here on */ - oldmask = current->cpus_allowed; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu)); + /* only run on specific CPU from here on. */ + /* This is poor form: use a workqueue or smp_call_function_single */ + if (!alloc_cpumask_var(&oldmask, GFP_KERNEL)) + return -ENOMEM; + + cpumask_copy(oldmask, tsk_cpumask(current)); + set_cpus_allowed_ptr(current, cpumask_of(pol->cpu)); if (smp_processor_id() != pol->cpu) { printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); @@ -1193,7 +1197,8 @@ static int powernowk8_target(struct cpufreq_policy *pol, ret = 0; err_out: - set_cpus_allowed_ptr(current, &oldmask); + set_cpus_allowed_ptr(current, oldmask); + free_cpumask_var(oldmask); return ret; } -- cgit v1.2.3 From db2820dd5445a44b4726f15a2bc89b9ded2503eb Mon Sep 17 00:00:00 2001 From: Krzysztof Helt Date: Sun, 25 Oct 2009 19:45:57 +0100 Subject: [CPUFREQ] powernow-k6: set transition latency value so ondemand governor can be used Set the transition latency to value smaller than CPUFREQ_ETERNAL so governors other than "performance" work (like the "ondemand" one). The value is found in "AMD PowerNow! Technology Platform Design Guide for Embedded Processors" dated December 2000 (AMD doc #24267A). There is the answer to one of FAQs on page 40 which states that suggested complete transition period is 200 us. Tested on K6-2+ CPU with K6-3 core (model 13, stepping 4). Signed-off-by: Krzysztof Helt Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/powernow-k6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c index f10dea409f40..cb01dac267d3 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c @@ -164,7 +164,7 @@ static int powernow_k6_cpu_init(struct cpufreq_policy *policy) } /* cpuinfo and default policy values */ - policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; + policy->cpuinfo.transition_latency = 200000; policy->cur = busfreq * max_multiplier; result = cpufreq_frequency_table_cpuinfo(policy, clock_ratio); -- cgit v1.2.3 From 1cce76c2ac60df40b02bf747982fb3f00e68f50a Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 17 Nov 2009 14:39:53 -0800 Subject: [CPUFREQ] use an enum for speedstep processor identification The "unsigned int processor" everywhere confused Rusty, leading to breakage when he passed in smp_processor_id(). Signed-off-by: Rusty Russell Acked-by: Dominik Brodowski Signed-off-by: Andrew Morton Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/speedstep-ich.c | 2 +- arch/x86/kernel/cpu/cpufreq/speedstep-lib.c | 6 +++--- arch/x86/kernel/cpu/cpufreq/speedstep-lib.h | 24 ++++++++++++------------ arch/x86/kernel/cpu/cpufreq/speedstep-smi.c | 2 +- 4 files changed, 17 insertions(+), 17 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c index 3ae5a7a3a500..2ce8e0b5cc54 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c @@ -39,7 +39,7 @@ static struct pci_dev *speedstep_chipset_dev; /* speedstep_processor */ -static unsigned int speedstep_processor; +static enum speedstep_processor speedstep_processor; static u32 pmbase; diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c index f4c290b8482f..ad0083abfa23 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c @@ -34,7 +34,7 @@ static int relaxed_check; * GET PROCESSOR CORE SPEED IN KHZ * *********************************************************************/ -static unsigned int pentium3_get_frequency(unsigned int processor) +static unsigned int pentium3_get_frequency(enum speedstep_processor processor) { /* See table 14 of p3_ds.pdf and table 22 of 29834003.pdf */ struct { @@ -227,7 +227,7 @@ static unsigned int pentium4_get_frequency(void) /* Warning: may get called from smp_call_function_single. */ -unsigned int speedstep_get_frequency(unsigned int processor) +unsigned int speedstep_get_frequency(enum speedstep_processor processor) { switch (processor) { case SPEEDSTEP_CPU_PCORE: @@ -380,7 +380,7 @@ EXPORT_SYMBOL_GPL(speedstep_detect_processor); * DETECT SPEEDSTEP SPEEDS * *********************************************************************/ -unsigned int speedstep_get_freqs(unsigned int processor, +unsigned int speedstep_get_freqs(enum speedstep_processor processor, unsigned int *low_speed, unsigned int *high_speed, unsigned int *transition_latency, diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h index 2b6c04e5a304..70d9cea1219d 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h @@ -11,18 +11,18 @@ /* processors */ - -#define SPEEDSTEP_CPU_PIII_C_EARLY 0x00000001 /* Coppermine core */ -#define SPEEDSTEP_CPU_PIII_C 0x00000002 /* Coppermine core */ -#define SPEEDSTEP_CPU_PIII_T 0x00000003 /* Tualatin core */ -#define SPEEDSTEP_CPU_P4M 0x00000004 /* P4-M */ - +enum speedstep_processor { + SPEEDSTEP_CPU_PIII_C_EARLY = 0x00000001, /* Coppermine core */ + SPEEDSTEP_CPU_PIII_C = 0x00000002, /* Coppermine core */ + SPEEDSTEP_CPU_PIII_T = 0x00000003, /* Tualatin core */ + SPEEDSTEP_CPU_P4M = 0x00000004, /* P4-M */ /* the following processors are not speedstep-capable and are not auto-detected * in speedstep_detect_processor(). However, their speed can be detected using * the speedstep_get_frequency() call. */ -#define SPEEDSTEP_CPU_PM 0xFFFFFF03 /* Pentium M */ -#define SPEEDSTEP_CPU_P4D 0xFFFFFF04 /* desktop P4 */ -#define SPEEDSTEP_CPU_PCORE 0xFFFFFF05 /* Core */ + SPEEDSTEP_CPU_PM = 0xFFFFFF03, /* Pentium M */ + SPEEDSTEP_CPU_P4D = 0xFFFFFF04, /* desktop P4 */ + SPEEDSTEP_CPU_PCORE = 0xFFFFFF05, /* Core */ +}; /* speedstep states -- only two of them */ @@ -31,10 +31,10 @@ /* detect a speedstep-capable processor */ -extern unsigned int speedstep_detect_processor (void); +extern enum speedstep_processor speedstep_detect_processor(void); /* detect the current speed (in khz) of the processor */ -extern unsigned int speedstep_get_frequency(unsigned int processor); +extern unsigned int speedstep_get_frequency(enum speedstep_processor processor); /* detect the low and high speeds of the processor. The callback @@ -42,7 +42,7 @@ extern unsigned int speedstep_get_frequency(unsigned int processor); * SPEEDSTEP_LOW; the second argument is zero so that no * cpufreq_notify_transition calls are initiated. */ -extern unsigned int speedstep_get_freqs(unsigned int processor, +extern unsigned int speedstep_get_freqs(enum speedstep_processor processor, unsigned int *low_speed, unsigned int *high_speed, unsigned int *transition_latency, diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c index befea088e4f5..04d73c114e49 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c @@ -35,7 +35,7 @@ static int smi_cmd; static unsigned int smi_sig; /* info about the processor */ -static unsigned int speedstep_processor; +static enum speedstep_processor speedstep_processor; /* * There are only two frequency states for each processor. Values -- cgit v1.2.3 From e2f74f355e9e2914483db10c05d70e69e0b7ae04 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Thu, 19 Nov 2009 12:31:01 +0100 Subject: [ACPI/CPUFREQ] Introduce bios_limit per cpu cpufreq sysfs interface This interface is mainly intended (and implemented) for ACPI _PPC BIOS frequency limitations, but other cpufreq drivers can also use it for similar use-cases. Why is this needed: Currently it's not obvious why cpufreq got limited. People see cpufreq/scaling_max_freq reduced, but this could have happened by: - any userspace prog writing to scaling_max_freq - thermal limitations - hardware (_PPC in ACPI case) limitiations Therefore export bios_limit (in kHz) to: - Point the user that it's the BIOS (broken or intended) which limits frequency - Export it as a sysfs interface for userspace progs. While this was a rarely used feature on laptops, there will appear more and more server implemenations providing "Green IT" features like allowing the service processor to limit the frequency. People want to know about HW/BIOS frequency limitations. All ACPI P-state driven cpufreq drivers are covered with this patch: - powernow-k8 - powernow-k7 - acpi-cpufreq Tested with a patched DSDT which limits the first two cores (_PPC returns 1) via _PPC, exposed by bios_limit: # echo 2200000 >cpu2/cpufreq/scaling_max_freq # cat cpu*/cpufreq/scaling_max_freq 2600000 2600000 2200000 2200000 # #scaling_max_freq shows general user/thermal/BIOS limitations # cat cpu*/cpufreq/bios_limit 2600000 2600000 2800000 2800000 # #bios_limit only shows the HW/BIOS limitation CC: Pallipadi Venkatesh CC: Len Brown CC: davej@codemonkey.org.uk CC: linux@dominikbrodowski.net Signed-off-by: Thomas Renninger Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 17 +++++++++-------- arch/x86/kernel/cpu/cpufreq/powernow-k7.c | 19 +++++++++++-------- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 17 +++++++++-------- 3 files changed, 29 insertions(+), 24 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 8b581d3905cb..d2e7c77c1ea4 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -764,14 +764,15 @@ static struct freq_attr *acpi_cpufreq_attr[] = { }; static struct cpufreq_driver acpi_cpufreq_driver = { - .verify = acpi_cpufreq_verify, - .target = acpi_cpufreq_target, - .init = acpi_cpufreq_cpu_init, - .exit = acpi_cpufreq_cpu_exit, - .resume = acpi_cpufreq_resume, - .name = "acpi-cpufreq", - .owner = THIS_MODULE, - .attr = acpi_cpufreq_attr, + .verify = acpi_cpufreq_verify, + .target = acpi_cpufreq_target, + .bios_limit = acpi_processor_get_bios_limit, + .init = acpi_cpufreq_cpu_init, + .exit = acpi_cpufreq_cpu_exit, + .resume = acpi_cpufreq_resume, + .name = "acpi-cpufreq", + .owner = THIS_MODULE, + .attr = acpi_cpufreq_attr, }; static int __init acpi_cpufreq_init(void) diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c index d47c775eb0ab..9a97116f89e5 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c @@ -714,14 +714,17 @@ static struct freq_attr *powernow_table_attr[] = { }; static struct cpufreq_driver powernow_driver = { - .verify = powernow_verify, - .target = powernow_target, - .get = powernow_get, - .init = powernow_cpu_init, - .exit = powernow_cpu_exit, - .name = "powernow-k7", - .owner = THIS_MODULE, - .attr = powernow_table_attr, + .verify = powernow_verify, + .target = powernow_target, + .get = powernow_get, +#ifdef CONFIG_X86_POWERNOW_K7_ACPI + .bios_limit = acpi_processor_get_bios_limit, +#endif + .init = powernow_cpu_init, + .exit = powernow_cpu_exit, + .name = "powernow-k7", + .owner = THIS_MODULE, + .attr = powernow_table_attr, }; static int __init powernow_init(void) diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index f30d25383940..a9df9441a9a2 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -1398,14 +1398,15 @@ static struct freq_attr *powernow_k8_attr[] = { }; static struct cpufreq_driver cpufreq_amd64_driver = { - .verify = powernowk8_verify, - .target = powernowk8_target, - .init = powernowk8_cpu_init, - .exit = __devexit_p(powernowk8_cpu_exit), - .get = powernowk8_get, - .name = "powernow-k8", - .owner = THIS_MODULE, - .attr = powernow_k8_attr, + .verify = powernowk8_verify, + .target = powernowk8_target, + .bios_limit = acpi_processor_get_bios_limit, + .init = powernowk8_cpu_init, + .exit = __devexit_p(powernowk8_cpu_exit), + .get = powernowk8_get, + .name = "powernow-k8", + .owner = THIS_MODULE, + .attr = powernow_k8_attr, }; /* driver entry point for init */ -- cgit v1.2.3 From 2ed7a806d864bde5903b73da1c65b0316b21efd3 Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Mon, 16 Nov 2009 14:21:13 -0700 Subject: x86/PCI: remove early PCI pr_debug statements commit db635adc turned -DDEBUG for x86/pci on when CONFIG_PCI_DEBUG is set. In general, I agree with that change. However, it exposes a bunch of very low level PCI debugging in the early x86 path, such as: 0 reading 2 from a: ffff 1 reading 2 from a: ffff 2 reading 2 from a: ffff 3 reading 2 from a: 300 3 reading 2 from 0: 1002 3 reading 2 from 2: 515e These statements add a lot of noise to the boot and aren't likely to be necessary even when handling random upstream bug reports. [In contrast, statements such as these: pci 0000:02:04.0: found [14e4:164a] class 000200 header type 00 pci 0000:02:04.0: reg 10: [mem 0xf8000000-0xf9ffffff 64bit] pci 0000:02:04.0: reg 30: [mem 0x00000000-0x0001ffff pref] are indeed useful when remote debugging users' machines] Remove the noisy printks and save electrons everywhere. Cc: Bjorn Helgaas Cc: Yinghai Lu Cc: Andi Kleen Cc: Ingo Molnar Signed-off-by: Alex Chiang Signed-off-by: Jesse Barnes --- arch/x86/pci/early.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/early.c b/arch/x86/pci/early.c index aaf26ae58cd5..d1067d539bee 100644 --- a/arch/x86/pci/early.c +++ b/arch/x86/pci/early.c @@ -12,8 +12,6 @@ u32 read_pci_config(u8 bus, u8 slot, u8 func, u8 offset) u32 v; outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); v = inl(0xcfc); - if (v != 0xffffffff) - pr_debug("%x reading 4 from %x: %x\n", slot, offset, v); return v; } @@ -22,7 +20,6 @@ u8 read_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset) u8 v; outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); v = inb(0xcfc + (offset&3)); - pr_debug("%x reading 1 from %x: %x\n", slot, offset, v); return v; } @@ -31,28 +28,24 @@ u16 read_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset) u16 v; outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); v = inw(0xcfc + (offset&2)); - pr_debug("%x reading 2 from %x: %x\n", slot, offset, v); return v; } void write_pci_config(u8 bus, u8 slot, u8 func, u8 offset, u32 val) { - pr_debug("%x writing to %x: %x\n", slot, offset, val); outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); outl(val, 0xcfc); } void write_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset, u8 val) { - pr_debug("%x writing to %x: %x\n", slot, offset, val); outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); outb(val, 0xcfc + (offset&3)); } void write_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset, u16 val) { - pr_debug("%x writing to %x: %x\n", slot, offset, val); outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); outw(val, 0xcfc + (offset&2)); } -- cgit v1.2.3 From 7b7a78594292d540720485544ad1043b71de14e0 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Tue, 17 Nov 2009 23:19:53 +0100 Subject: PCI: fix comment typo in bus_numa.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: AndrĂ© Goddard Rosa Signed-off-by: Jiri Kosina Signed-off-by: Jesse Barnes --- arch/x86/pci/bus_numa.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/bus_numa.h b/arch/x86/pci/bus_numa.h index 4ff126a3e887..730369f392a3 100644 --- a/arch/x86/pci/bus_numa.h +++ b/arch/x86/pci/bus_numa.h @@ -2,7 +2,7 @@ /* * sub bus (transparent) will use entres from 3 to store extra from - * root, so need to make sure we have enought slot there, Should we + * root, so need to make sure we have enough slot there, Should we * increase PCI_BUS_NUM_RESOURCES? */ #define RES_NUM 16 -- cgit v1.2.3 From 67f241f4579651ea4335b58967c8880c0a378249 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 11 Nov 2009 22:27:40 -0800 Subject: x86/pci: seperate x86_pci_rootbus_res_quirks from amd_bus.c Those functions are used by intel_bus.c so seperate them to another file. and make amd_bus a bit smaller. Signed-off-by: Yinghai Lu Signed-off-by: Jesse Barnes --- arch/x86/pci/Makefile | 2 +- arch/x86/pci/amd_bus.c | 99 ----------------------------------------------- arch/x86/pci/bus_numa.c | 101 ++++++++++++++++++++++++++++++++++++++++++++++++ arch/x86/pci/bus_numa.h | 1 + 4 files changed, 103 insertions(+), 100 deletions(-) create mode 100644 arch/x86/pci/bus_numa.c (limited to 'arch/x86') diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile index d8a0a6279a4d..564b008a51c7 100644 --- a/arch/x86/pci/Makefile +++ b/arch/x86/pci/Makefile @@ -15,7 +15,7 @@ obj-$(CONFIG_X86_NUMAQ) += numaq_32.o obj-y += common.o early.o obj-y += amd_bus.o -obj-$(CONFIG_X86_64) += intel_bus.o +obj-$(CONFIG_X86_64) += bus_numa.o intel_bus.o ifeq ($(CONFIG_PCI_DEBUG),y) EXTRA_CFLAGS += -DDEBUG diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c index 995f36096a42..95ecbd495955 100644 --- a/arch/x86/pci/amd_bus.c +++ b/arch/x86/pci/amd_bus.c @@ -6,8 +6,6 @@ #ifdef CONFIG_X86_64 #include -#include -#include #endif #include "bus_numa.h" @@ -19,54 +17,6 @@ #ifdef CONFIG_X86_64 -int pci_root_num; -struct pci_root_info pci_root_info[PCI_ROOT_NR]; -static int found_all_numa_early; - -void x86_pci_root_bus_res_quirks(struct pci_bus *b) -{ - int i; - int j; - struct pci_root_info *info; - - /* don't go for it if _CRS is used already */ - if (b->resource[0] != &ioport_resource || - b->resource[1] != &iomem_resource) - return; - - if (!pci_root_num) - return; - - /* for amd, if only one root bus, don't need to do anything */ - if (pci_root_num < 2 && found_all_numa_early) - return; - - for (i = 0; i < pci_root_num; i++) { - if (pci_root_info[i].bus_min == b->number) - break; - } - - if (i == pci_root_num) - return; - - printk(KERN_DEBUG "PCI: peer root bus %02x res updated from pci conf\n", - b->number); - - info = &pci_root_info[i]; - for (j = 0; j < info->res_num; j++) { - struct resource *res; - struct resource *root; - - res = &info->res[j]; - b->resource[j] = res; - if (res->flags & IORESOURCE_IO) - root = &ioport_resource; - else - root = &iomem_resource; - insert_resource(root, res); - } -} - #define RANGE_NUM 16 struct res_range { @@ -119,55 +69,6 @@ static void __init update_range(struct res_range *range, size_t start, } } -void __init update_res(struct pci_root_info *info, size_t start, - size_t end, unsigned long flags, int merge) -{ - int i; - struct resource *res; - - if (start > end) - return; - - if (!merge) - goto addit; - - /* try to merge it with old one */ - for (i = 0; i < info->res_num; i++) { - size_t final_start, final_end; - size_t common_start, common_end; - - res = &info->res[i]; - if (res->flags != flags) - continue; - - common_start = max((size_t)res->start, start); - common_end = min((size_t)res->end, end); - if (common_start > common_end + 1) - continue; - - final_start = min((size_t)res->start, start); - final_end = max((size_t)res->end, end); - - res->start = final_start; - res->end = final_end; - return; - } - -addit: - - /* need to add that */ - if (info->res_num >= RES_NUM) - return; - - res = &info->res[info->res_num]; - res->name = info->name; - res->flags = flags; - res->start = start; - res->end = end; - res->child = NULL; - info->res_num++; -} - struct pci_hostbridge_probe { u32 bus; u32 slot; diff --git a/arch/x86/pci/bus_numa.c b/arch/x86/pci/bus_numa.c new file mode 100644 index 000000000000..145df00e0387 --- /dev/null +++ b/arch/x86/pci/bus_numa.c @@ -0,0 +1,101 @@ +#include +#include + +#include "bus_numa.h" + +int pci_root_num; +struct pci_root_info pci_root_info[PCI_ROOT_NR]; +int found_all_numa_early; + +void x86_pci_root_bus_res_quirks(struct pci_bus *b) +{ + int i; + int j; + struct pci_root_info *info; + + /* don't go for it if _CRS is used already */ + if (b->resource[0] != &ioport_resource || + b->resource[1] != &iomem_resource) + return; + + if (!pci_root_num) + return; + + /* for amd, if only one root bus, don't need to do anything */ + if (pci_root_num < 2 && found_all_numa_early) + return; + + for (i = 0; i < pci_root_num; i++) { + if (pci_root_info[i].bus_min == b->number) + break; + } + + if (i == pci_root_num) + return; + + printk(KERN_DEBUG "PCI: peer root bus %02x res updated from pci conf\n", + b->number); + + info = &pci_root_info[i]; + for (j = 0; j < info->res_num; j++) { + struct resource *res; + struct resource *root; + + res = &info->res[j]; + b->resource[j] = res; + if (res->flags & IORESOURCE_IO) + root = &ioport_resource; + else + root = &iomem_resource; + insert_resource(root, res); + } +} + +void __init update_res(struct pci_root_info *info, size_t start, + size_t end, unsigned long flags, int merge) +{ + int i; + struct resource *res; + + if (start > end) + return; + + if (!merge) + goto addit; + + /* try to merge it with old one */ + for (i = 0; i < info->res_num; i++) { + size_t final_start, final_end; + size_t common_start, common_end; + + res = &info->res[i]; + if (res->flags != flags) + continue; + + common_start = max((size_t)res->start, start); + common_end = min((size_t)res->end, end); + if (common_start > common_end + 1) + continue; + + final_start = min((size_t)res->start, start); + final_end = max((size_t)res->end, end); + + res->start = final_start; + res->end = final_end; + return; + } + +addit: + + /* need to add that */ + if (info->res_num >= RES_NUM) + return; + + res = &info->res[info->res_num]; + res->name = info->name; + res->flags = flags; + res->start = start; + res->end = end; + res->child = NULL; + info->res_num++; +} diff --git a/arch/x86/pci/bus_numa.h b/arch/x86/pci/bus_numa.h index 730369f392a3..adbc23fe82ac 100644 --- a/arch/x86/pci/bus_numa.h +++ b/arch/x86/pci/bus_numa.h @@ -20,6 +20,7 @@ struct pci_root_info { #define PCI_ROOT_NR 4 extern int pci_root_num; extern struct pci_root_info pci_root_info[PCI_ROOT_NR]; +extern int found_all_numa_early; extern void update_res(struct pci_root_info *info, size_t start, size_t end, unsigned long flags, int merge); -- cgit v1.2.3 From 5663b1b963183e98ece3e77e471da833bb5ad2ff Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 13 Nov 2009 17:33:37 -0700 Subject: x86/PCI: MMCONFIG: remove unused definitions Reviewed-by: Yinghai Lu Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/pci/mmconfig-shared.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 02642773c29d..9bf04bcfb9c2 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -23,10 +23,6 @@ #define PREFIX "PCI: " -/* aperture is up to 256MB but BIOS may reserve less */ -#define MMCONFIG_APER_MIN (2 * 1024*1024) -#define MMCONFIG_APER_MAX (256 * 1024*1024) - /* Indicate if the mmcfg resources have been placed into the resource table. */ static int __initdata pci_mmcfg_resources_inserted; -- cgit v1.2.3 From e823d6ff581c5d1d76aa8c73a202d7d1419d34b8 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 13 Nov 2009 17:33:42 -0700 Subject: x86/PCI: MMCONFIG: count MCFG structures with local variable Use a local variable, not pci_mmcfg_config_num, to count MCFG entries. No functional change, but simplifies future changes. Reviewed-by: Yinghai Lu Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/pci/mmconfig-shared.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 9bf04bcfb9c2..fbadb89c71eb 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -555,7 +555,7 @@ static int __init pci_parse_mcfg(struct acpi_table_header *header) { struct acpi_table_mcfg *mcfg; unsigned long i; - int config_size; + int entries, config_size; if (!header) return -EINVAL; @@ -564,17 +564,18 @@ static int __init pci_parse_mcfg(struct acpi_table_header *header) /* how many config structures do we have */ pci_mmcfg_config_num = 0; + entries = 0; i = header->length - sizeof(struct acpi_table_mcfg); while (i >= sizeof(struct acpi_mcfg_allocation)) { - ++pci_mmcfg_config_num; + entries++; i -= sizeof(struct acpi_mcfg_allocation); }; - if (pci_mmcfg_config_num == 0) { + if (entries == 0) { printk(KERN_ERR PREFIX "MMCONFIG has no entries\n"); return -ENODEV; } - config_size = pci_mmcfg_config_num * sizeof(*pci_mmcfg_config); + config_size = entries * sizeof(*pci_mmcfg_config); pci_mmcfg_config = kmalloc(config_size, GFP_KERNEL); if (!pci_mmcfg_config) { printk(KERN_WARNING PREFIX @@ -583,8 +584,9 @@ static int __init pci_parse_mcfg(struct acpi_table_header *header) } memcpy(pci_mmcfg_config, &mcfg[1], config_size); + pci_mmcfg_config_num = entries; - for (i = 0; i < pci_mmcfg_config_num; ++i) { + for (i = 0; i < entries; i++) { if (acpi_mcfg_check_entry(mcfg, &pci_mmcfg_config[i])) { kfree(pci_mmcfg_config); pci_mmcfg_config_num = 0; -- cgit v1.2.3 From d3578ef7aab5b9bb874d085609b3ed5d9abffc48 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 13 Nov 2009 17:33:47 -0700 Subject: x86/PCI: MMCONFIG: step through MCFG table, not pci_mmcfg_config[] Step through the ACPI MCFG table, not pci_mmcfg_config[]. No functional change, but simplifies future patches that encapsulate pci_mmcfg_config[]. Reviewed-by: Yinghai Lu Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/pci/mmconfig-shared.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index fbadb89c71eb..7a7b6ba3abbb 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -554,6 +554,7 @@ static int __init acpi_mcfg_check_entry(struct acpi_table_mcfg *mcfg, static int __init pci_parse_mcfg(struct acpi_table_header *header) { struct acpi_table_mcfg *mcfg; + struct acpi_mcfg_allocation *cfg_table, *cfg; unsigned long i; int entries, config_size; @@ -586,8 +587,10 @@ static int __init pci_parse_mcfg(struct acpi_table_header *header) memcpy(pci_mmcfg_config, &mcfg[1], config_size); pci_mmcfg_config_num = entries; + cfg_table = (struct acpi_mcfg_allocation *) &mcfg[1]; for (i = 0; i < entries; i++) { - if (acpi_mcfg_check_entry(mcfg, &pci_mmcfg_config[i])) { + cfg = &cfg_table[i]; + if (acpi_mcfg_check_entry(mcfg, cfg)) { kfree(pci_mmcfg_config); pci_mmcfg_config_num = 0; return -ENODEV; -- cgit v1.2.3 From 7da7d360ae025158d09aab18d66f5d2fe3c02252 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 13 Nov 2009 17:33:53 -0700 Subject: x86/PCI: MMCONFIG: centralize MCFG structure management This patch encapsulate pci_mmcfg_config[] updates. All alloc/free is now done in pci_mmconfig_add() and free_all_mcfg(), so all updates to pci_mmcfg_config[] and pci_mmcfg_config_num are in those two functions. This replaces the previous sequence of extend_mmcfg(), fill_one_mmcfg() with the single pci_mmconfig_add() interface. This interface is currently static but will eventually be used in the host bridge hot-add path. Reviewed-by: Yinghai Lu Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/pci/mmconfig-shared.c | 85 +++++++++++++++++++----------------------- 1 file changed, 39 insertions(+), 46 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 7a7b6ba3abbb..62a8ecd96980 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -26,14 +26,24 @@ /* Indicate if the mmcfg resources have been placed into the resource table. */ static int __initdata pci_mmcfg_resources_inserted; -static __init int extend_mmcfg(int num) +static __init void free_all_mmcfg(void) +{ + pci_mmcfg_arch_free(); + pci_mmcfg_config_num = 0; + kfree(pci_mmcfg_config); + pci_mmcfg_config = NULL; +} + +static __init struct acpi_mcfg_allocation *pci_mmconfig_add(int segment, + int start, int end, u64 addr) { struct acpi_mcfg_allocation *new; - int new_num = pci_mmcfg_config_num + num; + int new_num = pci_mmcfg_config_num + 1; + int i = pci_mmcfg_config_num; new = kzalloc(sizeof(pci_mmcfg_config[0]) * new_num, GFP_KERNEL); if (!new) - return -1; + return NULL; if (pci_mmcfg_config) { memcpy(new, pci_mmcfg_config, @@ -42,18 +52,13 @@ static __init int extend_mmcfg(int num) } pci_mmcfg_config = new; - return 0; -} - -static __init void fill_one_mmcfg(u64 addr, int segment, int start, int end) -{ - int i = pci_mmcfg_config_num; - pci_mmcfg_config_num++; pci_mmcfg_config[i].address = addr; pci_mmcfg_config[i].pci_segment = segment; pci_mmcfg_config[i].start_bus_number = start; pci_mmcfg_config[i].end_bus_number = end; + + return &pci_mmcfg_config[i]; } static const char __init *pci_mmcfg_e7520(void) @@ -65,11 +70,9 @@ static const char __init *pci_mmcfg_e7520(void) if (win == 0x0000 || win == 0xf000) return NULL; - if (extend_mmcfg(1) == -1) + if (pci_mmconfig_add(0, 0, 255, win << 16) == NULL) return NULL; - fill_one_mmcfg(win << 16, 0, 0, 255); - return "Intel Corporation E7520 Memory Controller Hub"; } @@ -111,11 +114,9 @@ static const char __init *pci_mmcfg_intel_945(void) if ((pciexbar & mask) >= 0xf0000000U) return NULL; - if (extend_mmcfg(1) == -1) + if (pci_mmconfig_add(0, 0, (len >> 20) - 1, pciexbar & mask) == NULL) return NULL; - fill_one_mmcfg(pciexbar & mask, 0, 0, (len >> 20) - 1); - return "Intel Corporation 945G/GZ/P/PL Express Memory Controller Hub"; } @@ -124,7 +125,7 @@ static const char __init *pci_mmcfg_amd_fam10h(void) u32 low, high, address; u64 base, msr; int i; - unsigned segnbits = 0, busnbits; + unsigned segnbits = 0, busnbits, end_bus; if (!(pci_probe & PCI_CHECK_ENABLE_AMD_MMCONF)) return NULL; @@ -158,11 +159,13 @@ static const char __init *pci_mmcfg_amd_fam10h(void) busnbits = 8; } - if (extend_mmcfg(1 << segnbits) == -1) - return NULL; - + end_bus = (1 << busnbits) - 1; for (i = 0; i < (1 << segnbits); i++) - fill_one_mmcfg(base + (1<<28) * i, i, 0, (1 << busnbits) - 1); + if (pci_mmconfig_add(i, 0, end_bus, + base + (1<<28) * i) == NULL) { + free_all_mmcfg(); + return NULL; + } return "AMD Family 10h NB"; } @@ -210,16 +213,14 @@ static const char __init *pci_mmcfg_nvidia_mcp55(void) if (!(extcfg & extcfg_enable_mask)) continue; - if (extend_mmcfg(1) == -1) - continue; - size_index = (extcfg & extcfg_size_mask) >> extcfg_size_shift; base = extcfg & extcfg_base_mask[size_index]; /* base could > 4G */ base <<= extcfg_base_lshift; start = (extcfg & extcfg_start_mask) >> extcfg_start_shift; end = start + extcfg_sizebus[size_index] - 1; - fill_one_mmcfg(base, 0, start, end); + if (pci_mmconfig_add(0, start, end, base) == NULL) + continue; mcp55_mmconf_found++; } @@ -303,8 +304,7 @@ static int __init pci_mmcfg_check_hostbridge(void) if (!raw_pci_ops) return 0; - pci_mmcfg_config_num = 0; - pci_mmcfg_config = NULL; + free_all_mmcfg(); for (i = 0; i < ARRAY_SIZE(pci_mmcfg_probes); i++) { bus = pci_mmcfg_probes[i].bus; @@ -516,10 +516,7 @@ static void __init pci_mmcfg_reject_broken(int early) reject: printk(KERN_INFO "PCI: Not using MMCONFIG.\n"); - pci_mmcfg_arch_free(); - kfree(pci_mmcfg_config); - pci_mmcfg_config = NULL; - pci_mmcfg_config_num = 0; + free_all_mmcfg(); } static int __initdata known_bridge; @@ -556,7 +553,7 @@ static int __init pci_parse_mcfg(struct acpi_table_header *header) struct acpi_table_mcfg *mcfg; struct acpi_mcfg_allocation *cfg_table, *cfg; unsigned long i; - int entries, config_size; + int entries; if (!header) return -EINVAL; @@ -564,7 +561,7 @@ static int __init pci_parse_mcfg(struct acpi_table_header *header) mcfg = (struct acpi_table_mcfg *)header; /* how many config structures do we have */ - pci_mmcfg_config_num = 0; + free_all_mmcfg(); entries = 0; i = header->length - sizeof(struct acpi_table_mcfg); while (i >= sizeof(struct acpi_mcfg_allocation)) { @@ -576,25 +573,21 @@ static int __init pci_parse_mcfg(struct acpi_table_header *header) return -ENODEV; } - config_size = entries * sizeof(*pci_mmcfg_config); - pci_mmcfg_config = kmalloc(config_size, GFP_KERNEL); - if (!pci_mmcfg_config) { - printk(KERN_WARNING PREFIX - "No memory for MCFG config tables\n"); - return -ENOMEM; - } - - memcpy(pci_mmcfg_config, &mcfg[1], config_size); - pci_mmcfg_config_num = entries; - cfg_table = (struct acpi_mcfg_allocation *) &mcfg[1]; for (i = 0; i < entries; i++) { cfg = &cfg_table[i]; if (acpi_mcfg_check_entry(mcfg, cfg)) { - kfree(pci_mmcfg_config); - pci_mmcfg_config_num = 0; + free_all_mmcfg(); return -ENODEV; } + + if (pci_mmconfig_add(cfg->pci_segment, cfg->start_bus_number, + cfg->end_bus_number, cfg->address) == NULL) { + printk(KERN_WARNING PREFIX + "no memory for MCFG entries\n"); + free_all_mmcfg(); + return -ENOMEM; + } } return 0; -- cgit v1.2.3 From 463a5df175e3ceed684397ee2f8a3eb523d835a0 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 13 Nov 2009 17:33:58 -0700 Subject: x86/PCI: MMCONFIG: simplify tests for empty pci_mmcfg_config table We never set pci_mmcfg_config unless we increment pci_mmcfg_config_num, so there's no need to test both pci_mmcfg_config_num and pci_mmcfg_config. Reviewed-by: Yinghai Lu Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/pci/mmconfig-shared.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 62a8ecd96980..a0cc4d2efb8a 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -472,7 +472,6 @@ static void __init pci_mmcfg_reject_broken(int early) int i; if ((pci_mmcfg_config_num == 0) || - (pci_mmcfg_config == NULL) || (pci_mmcfg_config[0].address == 0)) return; @@ -618,7 +617,6 @@ static void __init __pci_mmcfg_init(int early) pci_mmcfg_reject_broken(early); if ((pci_mmcfg_config_num == 0) || - (pci_mmcfg_config == NULL) || (pci_mmcfg_config[0].address == 0)) return; @@ -652,7 +650,6 @@ static int __init pci_mmcfg_late_insert_resources(void) if ((pci_mmcfg_resources_inserted == 1) || (pci_probe & PCI_PROBE_MMCONF) == 0 || (pci_mmcfg_config_num == 0) || - (pci_mmcfg_config == NULL) || (pci_mmcfg_config[0].address == 0)) return 1; -- cgit v1.2.3 From f7ca69848786bb99fdfafb511791b078c298438e Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 13 Nov 2009 17:34:03 -0700 Subject: x86/PCI: MMCONFIG: reject MMCONFIG apertures at address zero Since all MMCONFIG regions go through pci_mmconfig_add(), we can test the address once there. If the caller supplies an address of zero, we never insert it in the pci_mmcfg_config[] table, so no need to test it elsewhere. Reviewed-by: Yinghai Lu Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/pci/mmconfig-shared.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index a0cc4d2efb8a..067a2cfed15c 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -41,6 +41,9 @@ static __init struct acpi_mcfg_allocation *pci_mmconfig_add(int segment, int new_num = pci_mmcfg_config_num + 1; int i = pci_mmcfg_config_num; + if (addr == 0) + return NULL; + new = kzalloc(sizeof(pci_mmcfg_config[0]) * new_num, GFP_KERNEL); if (!new) return NULL; @@ -471,8 +474,7 @@ static void __init pci_mmcfg_reject_broken(int early) typeof(pci_mmcfg_config[0]) *cfg; int i; - if ((pci_mmcfg_config_num == 0) || - (pci_mmcfg_config[0].address == 0)) + if (pci_mmcfg_config_num == 0) return; for (i = 0; i < pci_mmcfg_config_num; i++) { @@ -616,8 +618,7 @@ static void __init __pci_mmcfg_init(int early) pci_mmcfg_reject_broken(early); - if ((pci_mmcfg_config_num == 0) || - (pci_mmcfg_config[0].address == 0)) + if (pci_mmcfg_config_num == 0) return; if (pci_mmcfg_arch_init()) @@ -649,8 +650,7 @@ static int __init pci_mmcfg_late_insert_resources(void) */ if ((pci_mmcfg_resources_inserted == 1) || (pci_probe & PCI_PROBE_MMCONF) == 0 || - (pci_mmcfg_config_num == 0) || - (pci_mmcfg_config[0].address == 0)) + (pci_mmcfg_config_num == 0)) return 1; /* -- cgit v1.2.3 From df5eb1d67e8074dfbc23cf396c556116728187b3 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 13 Nov 2009 17:34:08 -0700 Subject: x86/PCI: MMCONFIG: add PCI_MMCFG_BUS_OFFSET() to factor common expression This factors out the common "bus << 20" expression used when computing the MMCONFIG address. Reviewed-by: Yinghai Lu Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/include/asm/pci_x86.h | 2 ++ arch/x86/pci/mmconfig-shared.c | 16 ++++++++-------- arch/x86/pci/mmconfig_32.c | 2 +- arch/x86/pci/mmconfig_64.c | 15 +++++++-------- 4 files changed, 18 insertions(+), 17 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index b399988eee3a..7d94a235ec82 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h @@ -124,6 +124,8 @@ extern void __init pci_mmcfg_arch_free(void); extern struct acpi_mcfg_allocation *pci_mmcfg_config; extern int pci_mmcfg_config_num; +#define PCI_MMCFG_BUS_OFFSET(bus) ((bus) << 20) + /* * AMD Fam10h CPUs are buggy, and cannot access MMIO config space * on their northbrige except through the * %eax register. As such, you MUST diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 067a2cfed15c..4820f0e8c594 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -355,8 +355,9 @@ static void __init pci_mmcfg_insert_resources(void) snprintf(names, PCI_MMCFG_RESOURCE_NAME_LEN, "PCI MMCONFIG %u [%02x-%02x]", cfg->pci_segment, cfg->start_bus_number, cfg->end_bus_number); - res->start = cfg->address + (cfg->start_bus_number << 20); - res->end = res->start + (num_buses << 20) - 1; + res->start = cfg->address + + PCI_MMCFG_BUS_OFFSET(cfg->start_bus_number); + res->end = res->start + PCI_MMCFG_BUS_OFFSET(num_buses) - 1; res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; insert_resource(&iomem_resource, res); names += PCI_MMCFG_RESOURCE_NAME_LEN; @@ -478,15 +479,14 @@ static void __init pci_mmcfg_reject_broken(int early) return; for (i = 0; i < pci_mmcfg_config_num; i++) { - int valid = 0; + int num_buses, valid = 0; u64 addr, size; cfg = &pci_mmcfg_config[i]; - addr = cfg->start_bus_number; - addr <<= 20; - addr += cfg->address; - size = cfg->end_bus_number + 1 - cfg->start_bus_number; - size <<= 20; + addr = cfg->address + + PCI_MMCFG_BUS_OFFSET(cfg->start_bus_number); + num_buses = cfg->end_bus_number - cfg->start_bus_number + 1; + size = PCI_MMCFG_BUS_OFFSET(num_buses); printk(KERN_NOTICE "PCI: MCFG configuration %d: base %lx " "segment %hu buses %u - %u\n", i, (unsigned long)cfg->address, cfg->pci_segment, diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c index f10a7e94a84c..8c19df89ad75 100644 --- a/arch/x86/pci/mmconfig_32.c +++ b/arch/x86/pci/mmconfig_32.c @@ -47,7 +47,7 @@ static u32 get_base_addr(unsigned int seg, int bus, unsigned devfn) */ static void pci_exp_set_dev_base(unsigned int base, int bus, int devfn) { - u32 dev_base = base | (bus << 20) | (devfn << 12); + u32 dev_base = base | PCI_MMCFG_BUS_OFFSET(bus) | (devfn << 12); int cpu = smp_processor_id(); if (dev_base != mmcfg_last_accessed_device || cpu != mmcfg_last_accessed_cpu) { diff --git a/arch/x86/pci/mmconfig_64.c b/arch/x86/pci/mmconfig_64.c index 94349f8b2f96..8588711924cc 100644 --- a/arch/x86/pci/mmconfig_64.c +++ b/arch/x86/pci/mmconfig_64.c @@ -43,7 +43,7 @@ static char __iomem *pci_dev_base(unsigned int seg, unsigned int bus, unsigned i addr = get_virt(seg, bus); if (!addr) return NULL; - return addr + ((bus << 20) | (devfn << 12)); + return addr + (PCI_MMCFG_BUS_OFFSET(bus) | (devfn << 12)); } static int pci_mmcfg_read(unsigned int seg, unsigned int bus, @@ -113,17 +113,16 @@ static void __iomem * __init mcfg_ioremap(struct acpi_mcfg_allocation *cfg) { void __iomem *addr; u64 start, size; + int num_buses; - start = cfg->start_bus_number; - start <<= 20; - start += cfg->address; - size = cfg->end_bus_number + 1 - cfg->start_bus_number; - size <<= 20; + start = cfg->address + PCI_MMCFG_BUS_OFFSET(cfg->start_bus_number); + num_buses = cfg->end_bus_number - cfg->start_bus_number + 1; + size = PCI_MMCFG_BUS_OFFSET(num_buses); addr = ioremap_nocache(start, size); if (addr) { printk(KERN_INFO "PCI: Using MMCONFIG at %Lx - %Lx\n", start, start + size - 1); - addr -= cfg->start_bus_number << 20; + addr -= PCI_MMCFG_BUS_OFFSET(cfg->start_bus_number); } return addr; } @@ -162,7 +161,7 @@ void __init pci_mmcfg_arch_free(void) for (i = 0; i < pci_mmcfg_config_num; ++i) { if (pci_mmcfg_virt[i].virt) { - iounmap(pci_mmcfg_virt[i].virt + (pci_mmcfg_virt[i].cfg->start_bus_number << 20)); + iounmap(pci_mmcfg_virt[i].virt + PCI_MMCFG_BUS_OFFSET(pci_mmcfg_virt[i].cfg->start_bus_number)); pci_mmcfg_virt[i].virt = NULL; pci_mmcfg_virt[i].cfg = NULL; } -- cgit v1.2.3 From d215a9c8b46e55a1d3bc1cd907c943ef95938a0e Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 13 Nov 2009 17:34:13 -0700 Subject: x86/PCI: MMCONFIG: use a private structure rather than the ACPI MCFG one This adds a struct pci_mmcfg_region with a little more information than the struct acpi_mcfg_allocation used previously. The acpi_mcfg structure is defined by the spec, so we can't change it. To begin with, struct pci_mmcfg_region is basically the same as the ACPI MCFG version, but future patches will add more information. Reviewed-by: Yinghai Lu Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/include/asm/pci_x86.h | 9 ++++++++- arch/x86/pci/mmconfig-shared.c | 10 +++++----- arch/x86/pci/mmconfig_32.c | 2 +- arch/x86/pci/mmconfig_64.c | 6 +++--- 4 files changed, 17 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index 7d94a235ec82..3a2ca5f69521 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h @@ -118,10 +118,17 @@ extern int __init pcibios_init(void); /* pci-mmconfig.c */ +struct pci_mmcfg_region { + u64 address; + u16 pci_segment; + u8 start_bus_number; + u8 end_bus_number; +}; + extern int __init pci_mmcfg_arch_init(void); extern void __init pci_mmcfg_arch_free(void); -extern struct acpi_mcfg_allocation *pci_mmcfg_config; +extern struct pci_mmcfg_region *pci_mmcfg_config; extern int pci_mmcfg_config_num; #define PCI_MMCFG_BUS_OFFSET(bus) ((bus) << 20) diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 4820f0e8c594..5f7afdd1e2d6 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -34,10 +34,10 @@ static __init void free_all_mmcfg(void) pci_mmcfg_config = NULL; } -static __init struct acpi_mcfg_allocation *pci_mmconfig_add(int segment, - int start, int end, u64 addr) +static __init struct pci_mmcfg_region *pci_mmconfig_add(int segment, int start, + int end, u64 addr) { - struct acpi_mcfg_allocation *new; + struct pci_mmcfg_region *new; int new_num = pci_mmcfg_config_num + 1; int i = pci_mmcfg_config_num; @@ -349,7 +349,7 @@ static void __init pci_mmcfg_insert_resources(void) names = (void *)&res[pci_mmcfg_config_num]; for (i = 0; i < pci_mmcfg_config_num; i++, res++) { - struct acpi_mcfg_allocation *cfg = &pci_mmcfg_config[i]; + struct pci_mmcfg_region *cfg = &pci_mmcfg_config[i]; num_buses = cfg->end_bus_number - cfg->start_bus_number + 1; res->name = names; snprintf(names, PCI_MMCFG_RESOURCE_NAME_LEN, @@ -523,7 +523,7 @@ reject: static int __initdata known_bridge; /* The physical address of the MMCONFIG aperture. Set from ACPI tables. */ -struct acpi_mcfg_allocation *pci_mmcfg_config; +struct pci_mmcfg_region *pci_mmcfg_config; int pci_mmcfg_config_num; static int __init acpi_mcfg_check_entry(struct acpi_table_mcfg *mcfg, diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c index 8c19df89ad75..3936eced993c 100644 --- a/arch/x86/pci/mmconfig_32.c +++ b/arch/x86/pci/mmconfig_32.c @@ -27,7 +27,7 @@ static int mmcfg_last_accessed_cpu; */ static u32 get_base_addr(unsigned int seg, int bus, unsigned devfn) { - struct acpi_mcfg_allocation *cfg; + struct pci_mmcfg_region *cfg; int cfg_num; for (cfg_num = 0; cfg_num < pci_mmcfg_config_num; cfg_num++) { diff --git a/arch/x86/pci/mmconfig_64.c b/arch/x86/pci/mmconfig_64.c index 8588711924cc..7a6231c3335e 100644 --- a/arch/x86/pci/mmconfig_64.c +++ b/arch/x86/pci/mmconfig_64.c @@ -14,14 +14,14 @@ /* Static virtual mapping of the MMCONFIG aperture */ struct mmcfg_virt { - struct acpi_mcfg_allocation *cfg; + struct pci_mmcfg_region *cfg; char __iomem *virt; }; static struct mmcfg_virt *pci_mmcfg_virt; static char __iomem *get_virt(unsigned int seg, unsigned bus) { - struct acpi_mcfg_allocation *cfg; + struct pci_mmcfg_region *cfg; int cfg_num; for (cfg_num = 0; cfg_num < pci_mmcfg_config_num; cfg_num++) { @@ -109,7 +109,7 @@ static struct pci_raw_ops pci_mmcfg = { .write = pci_mmcfg_write, }; -static void __iomem * __init mcfg_ioremap(struct acpi_mcfg_allocation *cfg) +static void __iomem * __init mcfg_ioremap(struct pci_mmcfg_region *cfg) { void __iomem *addr; u64 start, size; -- cgit v1.2.3 From d7e6b66fe87c9f42480d73fc314aecaeae84ca6b Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 13 Nov 2009 17:34:18 -0700 Subject: x86/PCI: MMCONFIG: rename pci_mmcfg_region structure members This only renames the struct pci_mmcfg_region members; no functional change. Reviewed-by: Yinghai Lu Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/include/asm/pci_x86.h | 6 ++--- arch/x86/pci/mmconfig-shared.c | 50 +++++++++++++++++++++--------------------- arch/x86/pci/mmconfig_32.c | 6 ++--- arch/x86/pci/mmconfig_64.c | 16 +++++++------- 4 files changed, 39 insertions(+), 39 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index 3a2ca5f69521..a752d618f196 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h @@ -120,9 +120,9 @@ extern int __init pcibios_init(void); struct pci_mmcfg_region { u64 address; - u16 pci_segment; - u8 start_bus_number; - u8 end_bus_number; + u16 segment; + u8 start_bus; + u8 end_bus; }; extern int __init pci_mmcfg_arch_init(void); diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 5f7afdd1e2d6..5479fbb2d6ab 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -57,9 +57,9 @@ static __init struct pci_mmcfg_region *pci_mmconfig_add(int segment, int start, pci_mmcfg_config_num++; pci_mmcfg_config[i].address = addr; - pci_mmcfg_config[i].pci_segment = segment; - pci_mmcfg_config[i].start_bus_number = start; - pci_mmcfg_config[i].end_bus_number = end; + pci_mmcfg_config[i].segment = segment; + pci_mmcfg_config[i].start_bus = start; + pci_mmcfg_config[i].end_bus = end; return &pci_mmcfg_config[i]; } @@ -260,8 +260,8 @@ static int __init cmp_mmcfg(const void *x1, const void *x2) const typeof(pci_mmcfg_config[0]) *m2 = x2; int start1, start2; - start1 = m1->start_bus_number; - start2 = m2->start_bus_number; + start1 = m1->start_bus; + start2 = m2->start_bus; return start1 - start2; } @@ -279,8 +279,8 @@ static void __init pci_mmcfg_check_end_bus_number(void) if (pci_mmcfg_config_num > 0) { i = pci_mmcfg_config_num - 1; cfg = &pci_mmcfg_config[i]; - if (cfg->end_bus_number < cfg->start_bus_number) - cfg->end_bus_number = 255; + if (cfg->end_bus < cfg->start_bus) + cfg->end_bus = 255; } /* don't overlap please */ @@ -288,11 +288,11 @@ static void __init pci_mmcfg_check_end_bus_number(void) cfg = &pci_mmcfg_config[i]; cfgx = &pci_mmcfg_config[i+1]; - if (cfg->end_bus_number < cfg->start_bus_number) - cfg->end_bus_number = 255; + if (cfg->end_bus < cfg->start_bus) + cfg->end_bus = 255; - if (cfg->end_bus_number >= cfgx->start_bus_number) - cfg->end_bus_number = cfgx->start_bus_number - 1; + if (cfg->end_bus >= cfgx->start_bus) + cfg->end_bus = cfgx->start_bus - 1; } } @@ -350,13 +350,13 @@ static void __init pci_mmcfg_insert_resources(void) names = (void *)&res[pci_mmcfg_config_num]; for (i = 0; i < pci_mmcfg_config_num; i++, res++) { struct pci_mmcfg_region *cfg = &pci_mmcfg_config[i]; - num_buses = cfg->end_bus_number - cfg->start_bus_number + 1; + num_buses = cfg->end_bus - cfg->start_bus + 1; res->name = names; snprintf(names, PCI_MMCFG_RESOURCE_NAME_LEN, - "PCI MMCONFIG %u [%02x-%02x]", cfg->pci_segment, - cfg->start_bus_number, cfg->end_bus_number); + "PCI MMCONFIG %u [%02x-%02x]", cfg->segment, + cfg->start_bus, cfg->end_bus); res->start = cfg->address + - PCI_MMCFG_BUS_OFFSET(cfg->start_bus_number); + PCI_MMCFG_BUS_OFFSET(cfg->start_bus); res->end = res->start + PCI_MMCFG_BUS_OFFSET(num_buses) - 1; res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; insert_resource(&iomem_resource, res); @@ -457,13 +457,13 @@ static int __init is_mmconf_reserved(check_reserved_t is_reserved, valid = 1; if (old_size != size) { - /* update end_bus_number */ - cfg->end_bus_number = cfg->start_bus_number + ((size>>20) - 1); + /* update end_bus */ + cfg->end_bus = cfg->start_bus + ((size>>20) - 1); printk(KERN_NOTICE "PCI: updated MCFG configuration %d: base %lx " "segment %hu buses %u - %u\n", - i, (unsigned long)cfg->address, cfg->pci_segment, - (unsigned int)cfg->start_bus_number, - (unsigned int)cfg->end_bus_number); + i, (unsigned long)cfg->address, cfg->segment, + (unsigned int)cfg->start_bus, + (unsigned int)cfg->end_bus); } } @@ -484,14 +484,14 @@ static void __init pci_mmcfg_reject_broken(int early) cfg = &pci_mmcfg_config[i]; addr = cfg->address + - PCI_MMCFG_BUS_OFFSET(cfg->start_bus_number); - num_buses = cfg->end_bus_number - cfg->start_bus_number + 1; + PCI_MMCFG_BUS_OFFSET(cfg->start_bus); + num_buses = cfg->end_bus - cfg->start_bus + 1; size = PCI_MMCFG_BUS_OFFSET(num_buses); printk(KERN_NOTICE "PCI: MCFG configuration %d: base %lx " "segment %hu buses %u - %u\n", - i, (unsigned long)cfg->address, cfg->pci_segment, - (unsigned int)cfg->start_bus_number, - (unsigned int)cfg->end_bus_number); + i, (unsigned long)cfg->address, cfg->segment, + (unsigned int)cfg->start_bus, + (unsigned int)cfg->end_bus); if (!early && !acpi_disabled) valid = is_mmconf_reserved(is_acpi_reserved, addr, size, i, cfg, 0); diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c index 3936eced993c..a3cee532c935 100644 --- a/arch/x86/pci/mmconfig_32.c +++ b/arch/x86/pci/mmconfig_32.c @@ -32,9 +32,9 @@ static u32 get_base_addr(unsigned int seg, int bus, unsigned devfn) for (cfg_num = 0; cfg_num < pci_mmcfg_config_num; cfg_num++) { cfg = &pci_mmcfg_config[cfg_num]; - if (cfg->pci_segment == seg && - (cfg->start_bus_number <= bus) && - (cfg->end_bus_number >= bus)) + if (cfg->segment == seg && + (cfg->start_bus <= bus) && + (cfg->end_bus >= bus)) return cfg->address; } diff --git a/arch/x86/pci/mmconfig_64.c b/arch/x86/pci/mmconfig_64.c index 7a6231c3335e..fdf08f97131b 100644 --- a/arch/x86/pci/mmconfig_64.c +++ b/arch/x86/pci/mmconfig_64.c @@ -26,9 +26,9 @@ static char __iomem *get_virt(unsigned int seg, unsigned bus) for (cfg_num = 0; cfg_num < pci_mmcfg_config_num; cfg_num++) { cfg = pci_mmcfg_virt[cfg_num].cfg; - if (cfg->pci_segment == seg && - (cfg->start_bus_number <= bus) && - (cfg->end_bus_number >= bus)) + if (cfg->segment == seg && + (cfg->start_bus <= bus) && + (cfg->end_bus >= bus)) return pci_mmcfg_virt[cfg_num].virt; } @@ -115,14 +115,14 @@ static void __iomem * __init mcfg_ioremap(struct pci_mmcfg_region *cfg) u64 start, size; int num_buses; - start = cfg->address + PCI_MMCFG_BUS_OFFSET(cfg->start_bus_number); - num_buses = cfg->end_bus_number - cfg->start_bus_number + 1; + start = cfg->address + PCI_MMCFG_BUS_OFFSET(cfg->start_bus); + num_buses = cfg->end_bus - cfg->start_bus + 1; size = PCI_MMCFG_BUS_OFFSET(num_buses); addr = ioremap_nocache(start, size); if (addr) { printk(KERN_INFO "PCI: Using MMCONFIG at %Lx - %Lx\n", start, start + size - 1); - addr -= PCI_MMCFG_BUS_OFFSET(cfg->start_bus_number); + addr -= PCI_MMCFG_BUS_OFFSET(cfg->start_bus); } return addr; } @@ -143,7 +143,7 @@ int __init pci_mmcfg_arch_init(void) if (!pci_mmcfg_virt[i].virt) { printk(KERN_ERR "PCI: Cannot map mmconfig aperture for " "segment %d\n", - pci_mmcfg_config[i].pci_segment); + pci_mmcfg_config[i].segment); pci_mmcfg_arch_free(); return 0; } @@ -161,7 +161,7 @@ void __init pci_mmcfg_arch_free(void) for (i = 0; i < pci_mmcfg_config_num; ++i) { if (pci_mmcfg_virt[i].virt) { - iounmap(pci_mmcfg_virt[i].virt + PCI_MMCFG_BUS_OFFSET(pci_mmcfg_virt[i].cfg->start_bus_number)); + iounmap(pci_mmcfg_virt[i].virt + PCI_MMCFG_BUS_OFFSET(pci_mmcfg_virt[i].cfg->start_bus)); pci_mmcfg_virt[i].virt = NULL; pci_mmcfg_virt[i].cfg = NULL; } -- cgit v1.2.3 From 95cf1cf0c5a767feb811dfed298b95b1df8824c7 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 13 Nov 2009 17:34:24 -0700 Subject: x86/PCI: MMCONFIG: use pointer to simplify pci_mmcfg_config[] structure access No functional change, but simplifies a future patch to convert the table to a list. Reviewed-by: Yinghai Lu Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/pci/mmconfig-shared.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 5479fbb2d6ab..28ac9f58a986 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -54,12 +54,14 @@ static __init struct pci_mmcfg_region *pci_mmconfig_add(int segment, int start, kfree(pci_mmcfg_config); } pci_mmcfg_config = new; - pci_mmcfg_config_num++; - pci_mmcfg_config[i].address = addr; - pci_mmcfg_config[i].segment = segment; - pci_mmcfg_config[i].start_bus = start; - pci_mmcfg_config[i].end_bus = end; + + new = &pci_mmcfg_config[i]; + + new->address = addr; + new->segment = segment; + new->start_bus = start; + new->end_bus = end; return &pci_mmcfg_config[i]; } -- cgit v1.2.3 From 56ddf4d3cf04e80254d3d721c6bea2f8ec44c41a Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 13 Nov 2009 17:34:29 -0700 Subject: x86/PCI: MMCONFIG: add resource to struct pci_mmcfg_region This patch adds a resource and corresponding name to the MMCONFIG structure. This makes allocation simpler (we can allocate the resource and name at the same time we allocate the pci_mmcfg_region), and gives us a way to hang onto the resource after inserting it. This will be needed so we can release and free it when hot-removing a host bridge. Reviewed-by: Yinghai Lu Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/include/asm/pci_x86.h | 5 ++++ arch/x86/pci/mmconfig-shared.c | 64 ++++++++++++++++++++++-------------------- 2 files changed, 38 insertions(+), 31 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index a752d618f196..a6d42c10b017 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h @@ -118,11 +118,16 @@ extern int __init pcibios_init(void); /* pci-mmconfig.c */ +/* "PCI MMCONFIG %04x [bus %02x-%02x]" */ +#define PCI_MMCFG_RESOURCE_NAME_LEN (22 + 4 + 2 + 2) + struct pci_mmcfg_region { + struct resource res; u64 address; u16 segment; u8 start_bus; u8 end_bus; + char name[PCI_MMCFG_RESOURCE_NAME_LEN]; }; extern int __init pci_mmcfg_arch_init(void); diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 28ac9f58a986..ba3aa3697418 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -28,7 +28,15 @@ static int __initdata pci_mmcfg_resources_inserted; static __init void free_all_mmcfg(void) { + int i; + struct pci_mmcfg_region *cfg; + pci_mmcfg_arch_free(); + for (i = 0; i < pci_mmcfg_config_num; i++) { + cfg = &pci_mmcfg_config[i]; + if (cfg->res.parent) + release_resource(&cfg->res); + } pci_mmcfg_config_num = 0; kfree(pci_mmcfg_config); pci_mmcfg_config = NULL; @@ -40,6 +48,8 @@ static __init struct pci_mmcfg_region *pci_mmconfig_add(int segment, int start, struct pci_mmcfg_region *new; int new_num = pci_mmcfg_config_num + 1; int i = pci_mmcfg_config_num; + int num_buses; + struct resource *res; if (addr == 0) return NULL; @@ -63,6 +73,15 @@ static __init struct pci_mmcfg_region *pci_mmconfig_add(int segment, int start, new->start_bus = start; new->end_bus = end; + num_buses = end - start + 1; + res = &new->res; + res->start = addr + PCI_MMCFG_BUS_OFFSET(start); + res->end = addr + PCI_MMCFG_BUS_OFFSET(num_buses) - 1; + res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + snprintf(new->name, PCI_MMCFG_RESOURCE_NAME_LEN, + "PCI MMCONFIG %04x [bus %02x-%02x]", segment, start, end); + res->name = new->name; + return &pci_mmcfg_config[i]; } @@ -336,33 +355,12 @@ static int __init pci_mmcfg_check_hostbridge(void) static void __init pci_mmcfg_insert_resources(void) { -#define PCI_MMCFG_RESOURCE_NAME_LEN 24 int i; - struct resource *res; - char *names; - unsigned num_buses; - - res = kcalloc(PCI_MMCFG_RESOURCE_NAME_LEN + sizeof(*res), - pci_mmcfg_config_num, GFP_KERNEL); - if (!res) { - printk(KERN_ERR "PCI: Unable to allocate MMCONFIG resources\n"); - return; - } + struct pci_mmcfg_region *cfg; - names = (void *)&res[pci_mmcfg_config_num]; - for (i = 0; i < pci_mmcfg_config_num; i++, res++) { - struct pci_mmcfg_region *cfg = &pci_mmcfg_config[i]; - num_buses = cfg->end_bus - cfg->start_bus + 1; - res->name = names; - snprintf(names, PCI_MMCFG_RESOURCE_NAME_LEN, - "PCI MMCONFIG %u [%02x-%02x]", cfg->segment, - cfg->start_bus, cfg->end_bus); - res->start = cfg->address + - PCI_MMCFG_BUS_OFFSET(cfg->start_bus); - res->end = res->start + PCI_MMCFG_BUS_OFFSET(num_buses) - 1; - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; - insert_resource(&iomem_resource, res); - names += PCI_MMCFG_RESOURCE_NAME_LEN; + for (i = 0; i < pci_mmcfg_config_num; i++) { + cfg = &pci_mmcfg_config[i]; + insert_resource(&iomem_resource, &cfg->res); } /* Mark that the resources have been inserted. */ @@ -444,7 +442,7 @@ static int __init is_mmconf_reserved(check_reserved_t is_reserved, typeof(pci_mmcfg_config[0]) *cfg, int with_e820) { u64 old_size = size; - int valid = 0; + int valid = 0, num_buses; while (!is_reserved(addr, addr + size, E820_RESERVED)) { size >>= 1; @@ -461,6 +459,12 @@ static int __init is_mmconf_reserved(check_reserved_t is_reserved, if (old_size != size) { /* update end_bus */ cfg->end_bus = cfg->start_bus + ((size>>20) - 1); + num_buses = cfg->end_bus - cfg->start_bus + 1; + cfg->res.end = cfg->res.start + + PCI_MMCFG_BUS_OFFSET(num_buses) - 1; + snprintf(cfg->name, PCI_MMCFG_RESOURCE_NAME_LEN, + "PCI MMCONFIG %04x [bus %02x-%02x]", + cfg->segment, cfg->start_bus, cfg->end_bus); printk(KERN_NOTICE "PCI: updated MCFG configuration %d: base %lx " "segment %hu buses %u - %u\n", i, (unsigned long)cfg->address, cfg->segment, @@ -481,14 +485,12 @@ static void __init pci_mmcfg_reject_broken(int early) return; for (i = 0; i < pci_mmcfg_config_num; i++) { - int num_buses, valid = 0; + int valid = 0; u64 addr, size; cfg = &pci_mmcfg_config[i]; - addr = cfg->address + - PCI_MMCFG_BUS_OFFSET(cfg->start_bus); - num_buses = cfg->end_bus - cfg->start_bus + 1; - size = PCI_MMCFG_BUS_OFFSET(num_buses); + addr = cfg->res.start; + size = resource_size(&cfg->res); printk(KERN_NOTICE "PCI: MCFG configuration %d: base %lx " "segment %hu buses %u - %u\n", i, (unsigned long)cfg->address, cfg->segment, -- cgit v1.2.3 From 2f2a8b9c90279e75f87aaf322a948bdced27e89f Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 13 Nov 2009 17:34:34 -0700 Subject: x86/PCI: MMCONFIG: trivial is_mmconf_reserved() interface simplification Since pci_mmcfg_region contains the struct resource, no need to pass the pci_mmcfg_region *and* the resource start/size. Reviewed-by: Yinghai Lu Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/pci/mmconfig-shared.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index ba3aa3697418..90422b4a7c91 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -438,9 +438,10 @@ static int __init is_acpi_reserved(u64 start, u64 end, unsigned not_used) typedef int (*check_reserved_t)(u64 start, u64 end, unsigned type); static int __init is_mmconf_reserved(check_reserved_t is_reserved, - u64 addr, u64 size, int i, - typeof(pci_mmcfg_config[0]) *cfg, int with_e820) + int i, typeof(pci_mmcfg_config[0]) *cfg, int with_e820) { + u64 addr = cfg->res.start; + u64 size = resource_size(&cfg->res); u64 old_size = size; int valid = 0, num_buses; @@ -486,11 +487,8 @@ static void __init pci_mmcfg_reject_broken(int early) for (i = 0; i < pci_mmcfg_config_num; i++) { int valid = 0; - u64 addr, size; cfg = &pci_mmcfg_config[i]; - addr = cfg->res.start; - size = resource_size(&cfg->res); printk(KERN_NOTICE "PCI: MCFG configuration %d: base %lx " "segment %hu buses %u - %u\n", i, (unsigned long)cfg->address, cfg->segment, @@ -498,7 +496,7 @@ static void __init pci_mmcfg_reject_broken(int early) (unsigned int)cfg->end_bus); if (!early && !acpi_disabled) - valid = is_mmconf_reserved(is_acpi_reserved, addr, size, i, cfg, 0); + valid = is_mmconf_reserved(is_acpi_reserved, i, cfg, 0); if (valid) continue; @@ -511,7 +509,7 @@ static void __init pci_mmcfg_reject_broken(int early) /* Don't try to do this check unless configuration type 1 is available. how about type 2 ?*/ if (raw_pci_ops) - valid = is_mmconf_reserved(e820_all_mapped, addr, size, i, cfg, 1); + valid = is_mmconf_reserved(e820_all_mapped, i, cfg, 1); if (!valid) goto reject; -- cgit v1.2.3 From 3f0f5503926f7447615f083c2d57545a83b6357c Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 13 Nov 2009 17:34:39 -0700 Subject: x86/PCI: MMCONFIG: add virtual address to struct pci_mmcfg_region The virtual address is only used for x86_64, but it's so much simpler to manage it as part of the pci_mmcfg_region that I think it's worth wasting a pointer per MMCONFIG region on x86_32. Reviewed-by: Yinghai Lu Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/include/asm/pci_x86.h | 1 + arch/x86/pci/mmconfig_64.c | 45 +++++++++++++----------------------------- 2 files changed, 15 insertions(+), 31 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index a6d42c10b017..7aa2ed8f25ab 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h @@ -124,6 +124,7 @@ extern int __init pcibios_init(void); struct pci_mmcfg_region { struct resource res; u64 address; + char __iomem *virt; u16 segment; u8 start_bus; u8 end_bus; diff --git a/arch/x86/pci/mmconfig_64.c b/arch/x86/pci/mmconfig_64.c index fdf08f97131b..78fa05c6c04d 100644 --- a/arch/x86/pci/mmconfig_64.c +++ b/arch/x86/pci/mmconfig_64.c @@ -12,24 +12,17 @@ #include #include -/* Static virtual mapping of the MMCONFIG aperture */ -struct mmcfg_virt { - struct pci_mmcfg_region *cfg; - char __iomem *virt; -}; -static struct mmcfg_virt *pci_mmcfg_virt; - static char __iomem *get_virt(unsigned int seg, unsigned bus) { + int i; struct pci_mmcfg_region *cfg; - int cfg_num; - for (cfg_num = 0; cfg_num < pci_mmcfg_config_num; cfg_num++) { - cfg = pci_mmcfg_virt[cfg_num].cfg; + for (i = 0; i < pci_mmcfg_config_num; ++i) { + cfg = &pci_mmcfg_config[i]; if (cfg->segment == seg && (cfg->start_bus <= bus) && (cfg->end_bus >= bus)) - return pci_mmcfg_virt[cfg_num].virt; + return cfg->virt; } /* Fall back to type 0 */ @@ -130,20 +123,15 @@ static void __iomem * __init mcfg_ioremap(struct pci_mmcfg_region *cfg) int __init pci_mmcfg_arch_init(void) { int i; - pci_mmcfg_virt = kzalloc(sizeof(*pci_mmcfg_virt) * - pci_mmcfg_config_num, GFP_KERNEL); - if (pci_mmcfg_virt == NULL) { - printk(KERN_ERR "PCI: Can not allocate memory for mmconfig structures\n"); - return 0; - } + struct pci_mmcfg_region *cfg; for (i = 0; i < pci_mmcfg_config_num; ++i) { - pci_mmcfg_virt[i].cfg = &pci_mmcfg_config[i]; - pci_mmcfg_virt[i].virt = mcfg_ioremap(&pci_mmcfg_config[i]); - if (!pci_mmcfg_virt[i].virt) { + cfg = &pci_mmcfg_config[i]; + cfg->virt = mcfg_ioremap(cfg); + if (!cfg->virt) { printk(KERN_ERR "PCI: Cannot map mmconfig aperture for " "segment %d\n", - pci_mmcfg_config[i].segment); + cfg->segment); pci_mmcfg_arch_free(); return 0; } @@ -155,18 +143,13 @@ int __init pci_mmcfg_arch_init(void) void __init pci_mmcfg_arch_free(void) { int i; - - if (pci_mmcfg_virt == NULL) - return; + struct pci_mmcfg_region *cfg; for (i = 0; i < pci_mmcfg_config_num; ++i) { - if (pci_mmcfg_virt[i].virt) { - iounmap(pci_mmcfg_virt[i].virt + PCI_MMCFG_BUS_OFFSET(pci_mmcfg_virt[i].cfg->start_bus)); - pci_mmcfg_virt[i].virt = NULL; - pci_mmcfg_virt[i].cfg = NULL; + cfg = &pci_mmcfg_config[i]; + if (cfg->virt) { + iounmap(cfg->virt + PCI_MMCFG_BUS_OFFSET(cfg->start_bus)); + cfg->virt = NULL; } } - - kfree(pci_mmcfg_virt); - pci_mmcfg_virt = NULL; } -- cgit v1.2.3 From 987c367b4e93be6826394e7c9cc14d28bb5c8810 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 13 Nov 2009 17:34:44 -0700 Subject: x86/PCI: MMCONFIG: remove typeof so we can use a list This replaces "typeof(pci_mmcfg_config[0])" with the actual type because I plan to convert pci_mmcfg_config to a list, and then "pci_mmcfg_config[0]" won't mean anything. Reviewed-by: Yinghai Lu Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/pci/mmconfig-shared.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 90422b4a7c91..6eeeac0d25f4 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -277,8 +277,8 @@ static struct pci_mmcfg_hostbridge_probe pci_mmcfg_probes[] __initdata = { static int __init cmp_mmcfg(const void *x1, const void *x2) { - const typeof(pci_mmcfg_config[0]) *m1 = x1; - const typeof(pci_mmcfg_config[0]) *m2 = x2; + const struct pci_mmcfg_region *m1 = x1; + const struct pci_mmcfg_region *m2 = x2; int start1, start2; start1 = m1->start_bus; @@ -290,7 +290,7 @@ static int __init cmp_mmcfg(const void *x1, const void *x2) static void __init pci_mmcfg_check_end_bus_number(void) { int i; - typeof(pci_mmcfg_config[0]) *cfg, *cfgx; + struct pci_mmcfg_region *cfg, *cfgx; /* sort them at first */ sort(pci_mmcfg_config, pci_mmcfg_config_num, @@ -438,7 +438,7 @@ static int __init is_acpi_reserved(u64 start, u64 end, unsigned not_used) typedef int (*check_reserved_t)(u64 start, u64 end, unsigned type); static int __init is_mmconf_reserved(check_reserved_t is_reserved, - int i, typeof(pci_mmcfg_config[0]) *cfg, int with_e820) + int i, struct pci_mmcfg_region *cfg, int with_e820) { u64 addr = cfg->res.start; u64 size = resource_size(&cfg->res); @@ -479,7 +479,7 @@ static int __init is_mmconf_reserved(check_reserved_t is_reserved, static void __init pci_mmcfg_reject_broken(int early) { - typeof(pci_mmcfg_config[0]) *cfg; + struct pci_mmcfg_region *cfg; int i; if (pci_mmcfg_config_num == 0) -- cgit v1.2.3 From ff097ddd4aeac790fd51d013c79c2f18ec9a7117 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 13 Nov 2009 17:34:49 -0700 Subject: x86/PCI: MMCONFIG: manage pci_mmcfg_region as a list, not a table This changes pci_mmcfg_region from a table to a list, to make it easier to add and remove MMCONFIG regions for PCI host bridge hotplug. Reviewed-by: Yinghai Lu Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/include/asm/pci_x86.h | 4 +- arch/x86/pci/mmconfig-shared.c | 106 ++++++++++++++++------------------------- arch/x86/pci/mmconfig_32.c | 5 +- arch/x86/pci/mmconfig_64.c | 13 ++--- 4 files changed, 47 insertions(+), 81 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index 7aa2ed8f25ab..0b7c316a70c3 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h @@ -122,6 +122,7 @@ extern int __init pcibios_init(void); #define PCI_MMCFG_RESOURCE_NAME_LEN (22 + 4 + 2 + 2) struct pci_mmcfg_region { + struct list_head list; struct resource res; u64 address; char __iomem *virt; @@ -134,8 +135,7 @@ struct pci_mmcfg_region { extern int __init pci_mmcfg_arch_init(void); extern void __init pci_mmcfg_arch_free(void); -extern struct pci_mmcfg_region *pci_mmcfg_config; -extern int pci_mmcfg_config_num; +extern struct list_head pci_mmcfg_list; #define PCI_MMCFG_BUS_OFFSET(bus) ((bus) << 20) diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 6eeeac0d25f4..2709aa81801d 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include @@ -26,53 +25,58 @@ /* Indicate if the mmcfg resources have been placed into the resource table. */ static int __initdata pci_mmcfg_resources_inserted; +LIST_HEAD(pci_mmcfg_list); + static __init void free_all_mmcfg(void) { - int i; - struct pci_mmcfg_region *cfg; + struct pci_mmcfg_region *cfg, *tmp; pci_mmcfg_arch_free(); - for (i = 0; i < pci_mmcfg_config_num; i++) { - cfg = &pci_mmcfg_config[i]; + list_for_each_entry_safe(cfg, tmp, &pci_mmcfg_list, list) { if (cfg->res.parent) release_resource(&cfg->res); + list_del(&cfg->list); + kfree(cfg); } - pci_mmcfg_config_num = 0; - kfree(pci_mmcfg_config); - pci_mmcfg_config = NULL; +} + +static __init void list_add_sorted(struct pci_mmcfg_region *new) +{ + struct pci_mmcfg_region *cfg; + + /* keep list sorted by segment and starting bus number */ + list_for_each_entry(cfg, &pci_mmcfg_list, list) { + if (cfg->segment > new->segment || + (cfg->segment == new->segment && + cfg->start_bus >= new->start_bus)) { + list_add_tail(&new->list, &cfg->list); + return; + } + } + list_add_tail(&new->list, &pci_mmcfg_list); } static __init struct pci_mmcfg_region *pci_mmconfig_add(int segment, int start, int end, u64 addr) { struct pci_mmcfg_region *new; - int new_num = pci_mmcfg_config_num + 1; - int i = pci_mmcfg_config_num; int num_buses; struct resource *res; if (addr == 0) return NULL; - new = kzalloc(sizeof(pci_mmcfg_config[0]) * new_num, GFP_KERNEL); + new = kzalloc(sizeof(*new), GFP_KERNEL); if (!new) return NULL; - if (pci_mmcfg_config) { - memcpy(new, pci_mmcfg_config, - sizeof(pci_mmcfg_config[0]) * new_num); - kfree(pci_mmcfg_config); - } - pci_mmcfg_config = new; - pci_mmcfg_config_num++; - - new = &pci_mmcfg_config[i]; - new->address = addr; new->segment = segment; new->start_bus = start; new->end_bus = end; + list_add_sorted(new); + num_buses = end - start + 1; res = &new->res; res->start = addr + PCI_MMCFG_BUS_OFFSET(start); @@ -82,7 +86,7 @@ static __init struct pci_mmcfg_region *pci_mmconfig_add(int segment, int start, "PCI MMCONFIG %04x [bus %02x-%02x]", segment, start, end); res->name = new->name; - return &pci_mmcfg_config[i]; + return new; } static const char __init *pci_mmcfg_e7520(void) @@ -214,7 +218,7 @@ static const char __init *pci_mmcfg_nvidia_mcp55(void) /* * do check if amd fam10h already took over */ - if (!acpi_disabled || pci_mmcfg_config_num || mcp55_checked) + if (!acpi_disabled || !list_empty(&pci_mmcfg_list) || mcp55_checked) return NULL; mcp55_checked = true; @@ -275,44 +279,26 @@ static struct pci_mmcfg_hostbridge_probe pci_mmcfg_probes[] __initdata = { 0x0369, pci_mmcfg_nvidia_mcp55 }, }; -static int __init cmp_mmcfg(const void *x1, const void *x2) -{ - const struct pci_mmcfg_region *m1 = x1; - const struct pci_mmcfg_region *m2 = x2; - int start1, start2; - - start1 = m1->start_bus; - start2 = m2->start_bus; - - return start1 - start2; -} - static void __init pci_mmcfg_check_end_bus_number(void) { - int i; struct pci_mmcfg_region *cfg, *cfgx; - /* sort them at first */ - sort(pci_mmcfg_config, pci_mmcfg_config_num, - sizeof(pci_mmcfg_config[0]), cmp_mmcfg, NULL); - /* last one*/ - if (pci_mmcfg_config_num > 0) { - i = pci_mmcfg_config_num - 1; - cfg = &pci_mmcfg_config[i]; + cfg = list_entry(pci_mmcfg_list.prev, typeof(*cfg), list); + if (cfg) if (cfg->end_bus < cfg->start_bus) cfg->end_bus = 255; - } - /* don't overlap please */ - for (i = 0; i < pci_mmcfg_config_num - 1; i++) { - cfg = &pci_mmcfg_config[i]; - cfgx = &pci_mmcfg_config[i+1]; + if (list_is_singular(&pci_mmcfg_list)) + return; + /* don't overlap please */ + list_for_each_entry(cfg, &pci_mmcfg_list, list) { if (cfg->end_bus < cfg->start_bus) cfg->end_bus = 255; - if (cfg->end_bus >= cfgx->start_bus) + cfgx = list_entry(cfg->list.next, typeof(*cfg), list); + if (cfg != cfgx && cfg->end_bus >= cfgx->start_bus) cfg->end_bus = cfgx->start_bus - 1; } } @@ -350,18 +336,15 @@ static int __init pci_mmcfg_check_hostbridge(void) /* some end_bus_number is crazy, fix it */ pci_mmcfg_check_end_bus_number(); - return pci_mmcfg_config_num != 0; + return !list_empty(&pci_mmcfg_list); } static void __init pci_mmcfg_insert_resources(void) { - int i; struct pci_mmcfg_region *cfg; - for (i = 0; i < pci_mmcfg_config_num; i++) { - cfg = &pci_mmcfg_config[i]; + list_for_each_entry(cfg, &pci_mmcfg_list, list) insert_resource(&iomem_resource, &cfg->res); - } /* Mark that the resources have been inserted. */ pci_mmcfg_resources_inserted = 1; @@ -482,18 +465,15 @@ static void __init pci_mmcfg_reject_broken(int early) struct pci_mmcfg_region *cfg; int i; - if (pci_mmcfg_config_num == 0) - return; - - for (i = 0; i < pci_mmcfg_config_num; i++) { + list_for_each_entry(cfg, &pci_mmcfg_list, list) { int valid = 0; - cfg = &pci_mmcfg_config[i]; printk(KERN_NOTICE "PCI: MCFG configuration %d: base %lx " "segment %hu buses %u - %u\n", i, (unsigned long)cfg->address, cfg->segment, (unsigned int)cfg->start_bus, (unsigned int)cfg->end_bus); + i++; if (!early && !acpi_disabled) valid = is_mmconf_reserved(is_acpi_reserved, i, cfg, 0); @@ -524,10 +504,6 @@ reject: static int __initdata known_bridge; -/* The physical address of the MMCONFIG aperture. Set from ACPI tables. */ -struct pci_mmcfg_region *pci_mmcfg_config; -int pci_mmcfg_config_num; - static int __init acpi_mcfg_check_entry(struct acpi_table_mcfg *mcfg, struct acpi_mcfg_allocation *cfg) { @@ -620,7 +596,7 @@ static void __init __pci_mmcfg_init(int early) pci_mmcfg_reject_broken(early); - if (pci_mmcfg_config_num == 0) + if (list_empty(&pci_mmcfg_list)) return; if (pci_mmcfg_arch_init()) @@ -652,7 +628,7 @@ static int __init pci_mmcfg_late_insert_resources(void) */ if ((pci_mmcfg_resources_inserted == 1) || (pci_probe & PCI_PROBE_MMCONF) == 0 || - (pci_mmcfg_config_num == 0)) + list_empty(&pci_mmcfg_list)) return 1; /* diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c index a3cee532c935..c04523e09649 100644 --- a/arch/x86/pci/mmconfig_32.c +++ b/arch/x86/pci/mmconfig_32.c @@ -28,15 +28,12 @@ static int mmcfg_last_accessed_cpu; static u32 get_base_addr(unsigned int seg, int bus, unsigned devfn) { struct pci_mmcfg_region *cfg; - int cfg_num; - for (cfg_num = 0; cfg_num < pci_mmcfg_config_num; cfg_num++) { - cfg = &pci_mmcfg_config[cfg_num]; + list_for_each_entry(cfg, &pci_mmcfg_list, list) if (cfg->segment == seg && (cfg->start_bus <= bus) && (cfg->end_bus >= bus)) return cfg->address; - } /* Fall back to type 0 */ return 0; diff --git a/arch/x86/pci/mmconfig_64.c b/arch/x86/pci/mmconfig_64.c index 78fa05c6c04d..ed1f479b4d0e 100644 --- a/arch/x86/pci/mmconfig_64.c +++ b/arch/x86/pci/mmconfig_64.c @@ -14,16 +14,13 @@ static char __iomem *get_virt(unsigned int seg, unsigned bus) { - int i; struct pci_mmcfg_region *cfg; - for (i = 0; i < pci_mmcfg_config_num; ++i) { - cfg = &pci_mmcfg_config[i]; + list_for_each_entry(cfg, &pci_mmcfg_list, list) if (cfg->segment == seg && (cfg->start_bus <= bus) && (cfg->end_bus >= bus)) return cfg->virt; - } /* Fall back to type 0 */ return NULL; @@ -122,11 +119,9 @@ static void __iomem * __init mcfg_ioremap(struct pci_mmcfg_region *cfg) int __init pci_mmcfg_arch_init(void) { - int i; struct pci_mmcfg_region *cfg; - for (i = 0; i < pci_mmcfg_config_num; ++i) { - cfg = &pci_mmcfg_config[i]; + list_for_each_entry(cfg, &pci_mmcfg_list, list) { cfg->virt = mcfg_ioremap(cfg); if (!cfg->virt) { printk(KERN_ERR "PCI: Cannot map mmconfig aperture for " @@ -142,11 +137,9 @@ int __init pci_mmcfg_arch_init(void) void __init pci_mmcfg_arch_free(void) { - int i; struct pci_mmcfg_region *cfg; - for (i = 0; i < pci_mmcfg_config_num; ++i) { - cfg = &pci_mmcfg_config[i]; + list_for_each_entry(cfg, &pci_mmcfg_list, list) { if (cfg->virt) { iounmap(cfg->virt + PCI_MMCFG_BUS_OFFSET(cfg->start_bus)); cfg->virt = NULL; -- cgit v1.2.3 From ba2afbabfc44d6322e8607c004f37868ff786cf8 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 13 Nov 2009 17:34:54 -0700 Subject: x86/PCI: MMCONFIG: add pci_mmconfig_remove() to remove MMCONFIG region This is only used internally now, but eventually will be used in the hot-remove path to remove the MMCONFIG region associated with a host bridge. Reviewed-by: Yinghai Lu Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/pci/mmconfig-shared.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 2709aa81801d..392f8fe16955 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -27,17 +27,21 @@ static int __initdata pci_mmcfg_resources_inserted; LIST_HEAD(pci_mmcfg_list); +static __init void pci_mmconfig_remove(struct pci_mmcfg_region *cfg) +{ + if (cfg->res.parent) + release_resource(&cfg->res); + list_del(&cfg->list); + kfree(cfg); +} + static __init void free_all_mmcfg(void) { struct pci_mmcfg_region *cfg, *tmp; pci_mmcfg_arch_free(); - list_for_each_entry_safe(cfg, tmp, &pci_mmcfg_list, list) { - if (cfg->res.parent) - release_resource(&cfg->res); - list_del(&cfg->list); - kfree(cfg); - } + list_for_each_entry_safe(cfg, tmp, &pci_mmcfg_list, list) + pci_mmconfig_remove(cfg); } static __init void list_add_sorted(struct pci_mmcfg_region *new) -- cgit v1.2.3 From 8c57786ad3d921713c7ad8e44132aa537a1d0fec Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 13 Nov 2009 17:34:59 -0700 Subject: x86/PCI: MMCONFIG: clean up printks No functional change; just tidy up printks and make them more consistent with the rest of PCI. Reviewed-by: Yinghai Lu Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/pci/mmconfig-shared.c | 46 +++++++++++++++++++----------------------- arch/x86/pci/mmconfig_64.c | 12 +++++------ 2 files changed, 26 insertions(+), 32 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 392f8fe16955..71d69b88fa33 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -90,6 +90,10 @@ static __init struct pci_mmcfg_region *pci_mmconfig_add(int segment, int start, "PCI MMCONFIG %04x [bus %02x-%02x]", segment, start, end); res->name = new->name; + printk(KERN_INFO PREFIX "MMCONFIG for domain %04x [bus %02x-%02x] at " + "%pR (base %#lx)\n", segment, start, end, &new->res, + (unsigned long) addr); + return new; } @@ -333,7 +337,7 @@ static int __init pci_mmcfg_check_hostbridge(void) name = pci_mmcfg_probes[i].probe(); if (name) - printk(KERN_INFO "PCI: Found %s with MMCONFIG support.\n", + printk(KERN_INFO PREFIX "%s with MMCONFIG support\n", name); } @@ -425,7 +429,7 @@ static int __init is_acpi_reserved(u64 start, u64 end, unsigned not_used) typedef int (*check_reserved_t)(u64 start, u64 end, unsigned type); static int __init is_mmconf_reserved(check_reserved_t is_reserved, - int i, struct pci_mmcfg_region *cfg, int with_e820) + struct pci_mmcfg_region *cfg, int with_e820) { u64 addr = cfg->res.start; u64 size = resource_size(&cfg->res); @@ -439,9 +443,9 @@ static int __init is_mmconf_reserved(check_reserved_t is_reserved, } if (size >= (16UL<<20) || size == old_size) { - printk(KERN_NOTICE - "PCI: MCFG area at %Lx reserved in %s\n", - addr, with_e820?"E820":"ACPI motherboard resources"); + printk(KERN_INFO PREFIX "MMCONFIG at %pR reserved in %s\n", + &cfg->res, + with_e820 ? "E820" : "ACPI motherboard resources"); valid = 1; if (old_size != size) { @@ -453,11 +457,11 @@ static int __init is_mmconf_reserved(check_reserved_t is_reserved, snprintf(cfg->name, PCI_MMCFG_RESOURCE_NAME_LEN, "PCI MMCONFIG %04x [bus %02x-%02x]", cfg->segment, cfg->start_bus, cfg->end_bus); - printk(KERN_NOTICE "PCI: updated MCFG configuration %d: base %lx " - "segment %hu buses %u - %u\n", - i, (unsigned long)cfg->address, cfg->segment, - (unsigned int)cfg->start_bus, - (unsigned int)cfg->end_bus); + printk(KERN_INFO PREFIX + "MMCONFIG for %04x [bus%02x-%02x] " + "at %pR (base %#lx) (size reduced!)\n", + cfg->segment, cfg->start_bus, cfg->end_bus, + &cfg->res, (unsigned long) cfg->address); } } @@ -467,33 +471,25 @@ static int __init is_mmconf_reserved(check_reserved_t is_reserved, static void __init pci_mmcfg_reject_broken(int early) { struct pci_mmcfg_region *cfg; - int i; list_for_each_entry(cfg, &pci_mmcfg_list, list) { int valid = 0; - printk(KERN_NOTICE "PCI: MCFG configuration %d: base %lx " - "segment %hu buses %u - %u\n", - i, (unsigned long)cfg->address, cfg->segment, - (unsigned int)cfg->start_bus, - (unsigned int)cfg->end_bus); - i++; - if (!early && !acpi_disabled) - valid = is_mmconf_reserved(is_acpi_reserved, i, cfg, 0); + valid = is_mmconf_reserved(is_acpi_reserved, cfg, 0); if (valid) continue; if (!early) - printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %Lx is not" - " reserved in ACPI motherboard resources\n", - cfg->address); + printk(KERN_ERR FW_BUG PREFIX + "MMCONFIG at %pR not reserved in " + "ACPI motherboard resources\n", &cfg->res); /* Don't try to do this check unless configuration type 1 is available. how about type 2 ?*/ if (raw_pci_ops) - valid = is_mmconf_reserved(e820_all_mapped, i, cfg, 1); + valid = is_mmconf_reserved(e820_all_mapped, cfg, 1); if (!valid) goto reject; @@ -502,7 +498,7 @@ static void __init pci_mmcfg_reject_broken(int early) return; reject: - printk(KERN_INFO "PCI: Not using MMCONFIG.\n"); + printk(KERN_INFO PREFIX "not using MMCONFIG\n"); free_all_mmcfg(); } @@ -525,7 +521,7 @@ static int __init acpi_mcfg_check_entry(struct acpi_table_mcfg *mcfg, return 0; } - printk(KERN_ERR PREFIX "MCFG region for %04x:%02x-%02x at %#llx " + printk(KERN_ERR PREFIX "MCFG region for %04x [bus %02x-%02x] at %#llx " "is above 4GB, ignored\n", cfg->pci_segment, cfg->start_bus_number, cfg->end_bus_number, cfg->address); return -EINVAL; diff --git a/arch/x86/pci/mmconfig_64.c b/arch/x86/pci/mmconfig_64.c index ed1f479b4d0e..cfa6cdb6d262 100644 --- a/arch/x86/pci/mmconfig_64.c +++ b/arch/x86/pci/mmconfig_64.c @@ -12,6 +12,8 @@ #include #include +#define PREFIX "PCI: " + static char __iomem *get_virt(unsigned int seg, unsigned bus) { struct pci_mmcfg_region *cfg; @@ -109,11 +111,8 @@ static void __iomem * __init mcfg_ioremap(struct pci_mmcfg_region *cfg) num_buses = cfg->end_bus - cfg->start_bus + 1; size = PCI_MMCFG_BUS_OFFSET(num_buses); addr = ioremap_nocache(start, size); - if (addr) { - printk(KERN_INFO "PCI: Using MMCONFIG at %Lx - %Lx\n", - start, start + size - 1); + if (addr) addr -= PCI_MMCFG_BUS_OFFSET(cfg->start_bus); - } return addr; } @@ -124,9 +123,8 @@ int __init pci_mmcfg_arch_init(void) list_for_each_entry(cfg, &pci_mmcfg_list, list) { cfg->virt = mcfg_ioremap(cfg); if (!cfg->virt) { - printk(KERN_ERR "PCI: Cannot map mmconfig aperture for " - "segment %d\n", - cfg->segment); + printk(KERN_ERR PREFIX "can't map MMCONFIG at %pR\n", + &cfg->res); pci_mmcfg_arch_free(); return 0; } -- cgit v1.2.3 From f6e1d8cc38b3776038fb15d3acc82ed8bb552f82 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 13 Nov 2009 17:35:04 -0700 Subject: x86/PCI: MMCONFIG: add lookup function This patch factors out the search for an MMCONFIG region, which was previously implemented in both mmconfig_32 and mmconfig_64. No functional change. Reviewed-by: Yinghai Lu Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/include/asm/pci_x86.h | 1 + arch/x86/pci/mmconfig-shared.c | 12 ++++++++++++ arch/x86/pci/mmconfig_32.c | 11 +++-------- arch/x86/pci/mmconfig_64.c | 23 ++++------------------- 4 files changed, 20 insertions(+), 27 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index 0b7c316a70c3..b4bf9a942ed0 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h @@ -134,6 +134,7 @@ struct pci_mmcfg_region { extern int __init pci_mmcfg_arch_init(void); extern void __init pci_mmcfg_arch_free(void); +extern struct pci_mmcfg_region *pci_mmconfig_lookup(int segment, int bus); extern struct list_head pci_mmcfg_list; diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 71d69b88fa33..b19d1e54201e 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -97,6 +97,18 @@ static __init struct pci_mmcfg_region *pci_mmconfig_add(int segment, int start, return new; } +struct pci_mmcfg_region *pci_mmconfig_lookup(int segment, int bus) +{ + struct pci_mmcfg_region *cfg; + + list_for_each_entry(cfg, &pci_mmcfg_list, list) + if (cfg->segment == segment && + cfg->start_bus <= bus && bus <= cfg->end_bus) + return cfg; + + return NULL; +} + static const char __init *pci_mmcfg_e7520(void) { u32 win; diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c index c04523e09649..90d5fd476ed4 100644 --- a/arch/x86/pci/mmconfig_32.c +++ b/arch/x86/pci/mmconfig_32.c @@ -27,15 +27,10 @@ static int mmcfg_last_accessed_cpu; */ static u32 get_base_addr(unsigned int seg, int bus, unsigned devfn) { - struct pci_mmcfg_region *cfg; + struct pci_mmcfg_region *cfg = pci_mmconfig_lookup(seg, bus); - list_for_each_entry(cfg, &pci_mmcfg_list, list) - if (cfg->segment == seg && - (cfg->start_bus <= bus) && - (cfg->end_bus >= bus)) - return cfg->address; - - /* Fall back to type 0 */ + if (cfg) + return cfg->address; return 0; } diff --git a/arch/x86/pci/mmconfig_64.c b/arch/x86/pci/mmconfig_64.c index cfa6cdb6d262..e783841bd1d7 100644 --- a/arch/x86/pci/mmconfig_64.c +++ b/arch/x86/pci/mmconfig_64.c @@ -14,28 +14,13 @@ #define PREFIX "PCI: " -static char __iomem *get_virt(unsigned int seg, unsigned bus) -{ - struct pci_mmcfg_region *cfg; - - list_for_each_entry(cfg, &pci_mmcfg_list, list) - if (cfg->segment == seg && - (cfg->start_bus <= bus) && - (cfg->end_bus >= bus)) - return cfg->virt; - - /* Fall back to type 0 */ - return NULL; -} - static char __iomem *pci_dev_base(unsigned int seg, unsigned int bus, unsigned int devfn) { - char __iomem *addr; + struct pci_mmcfg_region *cfg = pci_mmconfig_lookup(seg, bus); - addr = get_virt(seg, bus); - if (!addr) - return NULL; - return addr + (PCI_MMCFG_BUS_OFFSET(bus) | (devfn << 12)); + if (cfg && cfg->virt) + return cfg->virt + (PCI_MMCFG_BUS_OFFSET(bus) | (devfn << 12)); + return NULL; } static int pci_mmcfg_read(unsigned int seg, unsigned int bus, -- cgit v1.2.3 From 2263576cfc6e8f6ab038126c3254404b9fcb1c33 Mon Sep 17 00:00:00 2001 From: Lin Ming Date: Fri, 13 Nov 2009 10:06:08 +0800 Subject: ACPICA: Add post-order callback to acpi_walk_namespace The existing interface only has a pre-order callback. This change adds an additional parameter for a post-order callback which will be more useful for bus scans. ACPICA BZ 779. Also update the external calls to acpi_walk_namespace. http://www.acpica.org/bugzilla/show_bug.cgi?id=779 Signed-off-by: Lin Ming Signed-off-by: Bob Moore Signed-off-by: Len Brown --- arch/x86/kernel/cpu/cpufreq/longhaul.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c index cabd2fa3fc93..7e7eea4f8261 100644 --- a/arch/x86/kernel/cpu/cpufreq/longhaul.c +++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c @@ -885,7 +885,7 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) /* Find ACPI data for processor */ acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, - ACPI_UINT32_MAX, &longhaul_walk_callback, + ACPI_UINT32_MAX, &longhaul_walk_callback, NULL, NULL, (void *)&pr); /* Check ACPI support for C3 state */ -- cgit v1.2.3 From be012920ecba161ad20303a3f6d9e96c58cf97c7 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Sat, 21 Nov 2009 08:35:55 +0800 Subject: xen: re-register runstate area earlier on resume. This is necessary to ensure the runstate area is available to xen_sched_clock before any calls to printk which will require it in order to provide a timestamp. I chose to pull the xen_setup_runstate_info out of xen_time_init into the caller in order to maintain parity with calling xen_setup_runstate_info separately from calling xen_time_resume. Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge Cc: Stable Kernel --- arch/x86/xen/enlighten.c | 2 ++ arch/x86/xen/time.c | 5 ++--- arch/x86/xen/xen-ops.h | 1 + 3 files changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index dfbf70e65860..cb61f77e4496 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -148,6 +148,8 @@ void xen_vcpu_restore(void) HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL)) BUG(); + xen_setup_runstate_info(cpu); + xen_vcpu_setup(cpu); if (other_cpu && diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 0a5aa44299a5..6bbff94328d2 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -100,7 +100,7 @@ bool xen_vcpu_stolen(int vcpu) return per_cpu(runstate, vcpu).state == RUNSTATE_runnable; } -static void setup_runstate_info(int cpu) +void xen_setup_runstate_info(int cpu) { struct vcpu_register_runstate_memory_area area; @@ -442,8 +442,6 @@ void xen_setup_timer(int cpu) evt->cpumask = cpumask_of(cpu); evt->irq = irq; - - setup_runstate_info(cpu); } void xen_teardown_timer(int cpu) @@ -494,6 +492,7 @@ __init void xen_time_init(void) setup_force_cpu_cap(X86_FEATURE_TSC); + xen_setup_runstate_info(cpu); xen_setup_timer(cpu); xen_setup_cpu_clockevents(); } diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 355fa6b99c9c..32529326683d 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -41,6 +41,7 @@ void __init xen_build_dynamic_phys_to_machine(void); void xen_init_irq_ops(void); void xen_setup_timer(int cpu); +void xen_setup_runstate_info(int cpu); void xen_teardown_timer(int cpu); cycle_t xen_clocksource_read(void); void xen_setup_cpu_clockevents(void); -- cgit v1.2.3 From 3905bb2aa7bb801b31946b37a4635ebac4009051 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sat, 21 Nov 2009 08:46:29 +0800 Subject: xen: restore runstate_info even if !have_vcpu_info_placement Even if have_vcpu_info_placement is not set, we still need to set up the runstate area on each resumed vcpu. Signed-off-by: Jeremy Fitzhardinge Cc: Stable Kernel --- arch/x86/xen/enlighten.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index cb61f77e4496..a7b49f99a130 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -138,26 +138,23 @@ static void xen_vcpu_setup(int cpu) */ void xen_vcpu_restore(void) { - if (have_vcpu_info_placement) { - int cpu; + int cpu; - for_each_online_cpu(cpu) { - bool other_cpu = (cpu != smp_processor_id()); + for_each_online_cpu(cpu) { + bool other_cpu = (cpu != smp_processor_id()); - if (other_cpu && - HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL)) - BUG(); + if (other_cpu && + HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL)) + BUG(); - xen_setup_runstate_info(cpu); + xen_setup_runstate_info(cpu); + if (have_vcpu_info_placement) xen_vcpu_setup(cpu); - if (other_cpu && - HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL)) - BUG(); - } - - BUG_ON(!have_vcpu_info_placement); + if (other_cpu && + HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL)) + BUG(); } } -- cgit v1.2.3 From fa24ba62ea2869308ffc9f0b286ac9650b4ca6cb Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Sat, 21 Nov 2009 11:32:49 +0000 Subject: xen: correctly restore pfn_to_mfn_list_list after resume pvops kernels >= 2.6.30 can currently only be saved and restored once. The second attempt to save results in: ERROR Internal error: Frame# in pfn-to-mfn frame list is not in pseudophys ERROR Internal error: entry 0: p2m_frame_list[0] is 0xf2c2c2c2, max 0x120000 ERROR Internal error: Failed to map/save the p2m frame list I finally narrowed it down to: commit cdaead6b4e657f960d6d6f9f380e7dfeedc6a09b Author: Jeremy Fitzhardinge Date: Fri Feb 27 15:34:59 2009 -0800 xen: split construction of p2m mfn tables from registration Build the p2m_mfn_list_list early with the rest of the p2m table, but register it later when the real shared_info structure is in place. Signed-off-by: Jeremy Fitzhardinge The unforeseen side-effect of this change was to cause the mfn list list to not be rebuilt on resume. Prior to this change it would have been rebuilt via xen_post_suspend() -> xen_setup_shared_info() -> xen_setup_mfn_list_list(). Fix by explicitly calling xen_build_mfn_list_list() from xen_post_suspend(). Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge Cc: Stable Kernel --- arch/x86/xen/mmu.c | 2 +- arch/x86/xen/suspend.c | 2 ++ arch/x86/xen/xen-ops.h | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 3bf7b1d250ce..bf4cd6bfe959 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -185,7 +185,7 @@ static inline unsigned p2m_index(unsigned long pfn) } /* Build the parallel p2m_top_mfn structures */ -static void __init xen_build_mfn_list_list(void) +void xen_build_mfn_list_list(void) { unsigned pfn, idx; diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index 95be7b434724..6343a5d8e93c 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -27,6 +27,8 @@ void xen_pre_suspend(void) void xen_post_suspend(int suspend_cancelled) { + xen_build_mfn_list_list(); + xen_setup_shared_info(); if (suspend_cancelled) { diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 32529326683d..f9153a300bce 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -25,6 +25,7 @@ extern struct shared_info *HYPERVISOR_shared_info; void xen_setup_mfn_list_list(void); void xen_setup_shared_info(void); +void xen_build_mfn_list_list(void); void xen_setup_machphys_mapping(void); pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); void xen_ident_map_ISA(void); -- cgit v1.2.3 From f350c7922faad3397c98c81a9e5658f5a1ef0214 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Tue, 24 Nov 2009 10:16:23 +0000 Subject: xen: register timer interrupt with IRQF_TIMER Otherwise the timer is disabled by dpm_suspend_noirq() which in turn prevents correct operation of stop_machine on multi-processor systems and breaks suspend. Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge Cc: Stable Kernel --- arch/x86/xen/time.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 6bbff94328d2..9d1f853120d8 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -434,7 +434,7 @@ void xen_setup_timer(int cpu) name = ""; irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt, - IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING, + IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER, name, NULL); evt = &per_cpu(xen_clock_events, cpu); -- cgit v1.2.3 From 028896721ac04f6fa0697f3ecac3f98761746363 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Tue, 24 Nov 2009 09:32:48 -0800 Subject: xen: register runstate on secondary CPUs The commit "xen: re-register runstate area earlier on resume" caused us to never try and setup the runstate area for secondary CPUs. Ensure that we do this... Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge Cc: Stable Kernel --- arch/x86/xen/smp.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index fe03eeed7b48..360f8d8c19cd 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -295,6 +295,7 @@ static int __cpuinit xen_cpu_up(unsigned int cpu) (unsigned long)task_stack_page(idle) - KERNEL_STACK_OFFSET + THREAD_SIZE; #endif + xen_setup_runstate_info(cpu); xen_setup_timer(cpu); xen_init_lock_cpu(cpu); -- cgit v1.2.3 From 499d19b82b586aef18727b9ae1437f8f37b66e91 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 24 Nov 2009 09:38:25 -0800 Subject: xen: register runstate info for boot CPU early printk timestamping uses sched_clock, which in turn relies on runstate info under Xen. So make sure we set it up before any printks can be called. Signed-off-by: Jeremy Fitzhardinge Cc: Stable Kernel --- arch/x86/xen/enlighten.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index a7b49f99a130..79f97383cde3 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1181,6 +1181,8 @@ asmlinkage void __init xen_start_kernel(void) xen_raw_console_write("about to get started...\n"); + xen_setup_runstate_info(0); + /* Start the world */ #ifdef CONFIG_X86_32 i386_start_kernel(); -- cgit v1.2.3 From 6aaf5d633bb6cead81b396d861d7bae4b9a0ba7e Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 25 Nov 2009 13:15:38 -0800 Subject: xen: use iret for return from 64b kernel to 32b usermode If Xen wants to return to a 32b usermode with sysret it must use the right form. When using VCGF_in_syscall to trigger this, it looks at the code segment and does a 32b sysret if it is FLAT_USER_CS32. However, this is different from __USER32_CS, so it fails to return properly if we use the normal Linux segment. So avoid the whole mess by dropping VCGF_in_syscall and simply use plain iret to return to usermode. Signed-off-by: Jeremy Fitzhardinge Acked-by: Jan Beulich Cc: Stable Kernel --- arch/x86/xen/xen-asm_64.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index 02f496a8dbaa..53adefda4275 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -96,7 +96,7 @@ ENTRY(xen_sysret32) pushq $__USER32_CS pushq %rcx - pushq $VGCF_in_syscall + pushq $0 1: jmp hypercall_iret ENDPATCH(xen_sysret32) RELOC(xen_sysret32, 1b+1) @@ -151,7 +151,7 @@ ENTRY(xen_syscall32_target) ENTRY(xen_sysenter_target) lea 16(%rsp), %rsp /* strip %rcx, %r11 */ mov $-ENOSYS, %rax - pushq $VGCF_in_syscall + pushq $0 jmp hypercall_iret ENDPROC(xen_syscall32_target) ENDPROC(xen_sysenter_target) -- cgit v1.2.3 From f6eafe3665bcc374c66775d58312d1c06c55303f Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Wed, 25 Nov 2009 14:12:08 +0000 Subject: xen: call clock resume notifier on all CPUs tick_resume() is never called on secondary processors. Presumably this is because they are offlined for suspend on native and so this is normally taken care of in the CPU onlining path. Under Xen we keep all CPUs online over a suspend. This patch papers over the issue for me but I will investigate a more generic, less hacky, way of doing to the same. tick_suspend is also only called on the boot CPU which I presume should be fixed too. Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge Cc: Stable Kernel Cc: Thomas Gleixner --- arch/x86/xen/suspend.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index 6343a5d8e93c..987267f79bf5 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -1,4 +1,5 @@ #include +#include #include #include @@ -46,7 +47,19 @@ void xen_post_suspend(int suspend_cancelled) } +static void xen_vcpu_notify_restore(void *data) +{ + unsigned long reason = (unsigned long)data; + + /* Boot processor notified via generic timekeeping_resume() */ + if ( smp_processor_id() == 0) + return; + + clockevents_notify(reason, NULL); +} + void xen_arch_resume(void) { - /* nothing */ + smp_call_function(xen_vcpu_notify_restore, + (void *)CLOCK_EVT_NOTIFY_RESUME, 1); } -- cgit v1.2.3 From af901ca181d92aac3a7dc265144a9081a86d8f39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Goddard=20Rosa?= Date: Sat, 14 Nov 2009 13:09:05 -0200 Subject: tree-wide: fix assorted typos all over the place MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit That is "success", "unknown", "through", "performance", "[re|un]mapping" , "access", "default", "reasonable", "[con]currently", "temperature" , "channel", "[un]used", "application", "example","hierarchy", "therefore" , "[over|under]flow", "contiguous", "threshold", "enough" and others. Signed-off-by: AndrĂ© Goddard Rosa Signed-off-by: Jiri Kosina --- arch/x86/include/asm/desc_defs.h | 4 ++-- arch/x86/include/asm/mmzone_32.h | 2 +- arch/x86/include/asm/uv/uv_bau.h | 2 +- arch/x86/kernel/acpi/boot.c | 2 +- arch/x86/kernel/amd_iommu.c | 4 ++-- arch/x86/kernel/cpu/perf_event.c | 2 +- arch/x86/kernel/kprobes.c | 4 ++-- arch/x86/mm/kmmio.c | 4 ++-- 8 files changed, 12 insertions(+), 12 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/desc_defs.h b/arch/x86/include/asm/desc_defs.h index 9d6684849fd9..278441f39856 100644 --- a/arch/x86/include/asm/desc_defs.h +++ b/arch/x86/include/asm/desc_defs.h @@ -12,9 +12,9 @@ #include /* - * FIXME: Acessing the desc_struct through its fields is more elegant, + * FIXME: Accessing the desc_struct through its fields is more elegant, * and should be the one valid thing to do. However, a lot of open code - * still touches the a and b acessors, and doing this allow us to do it + * still touches the a and b accessors, and doing this allow us to do it * incrementally. We keep the signature as a struct, rather than an union, * so we can get rid of it transparently in the future -- glommer */ diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h index ede6998bd92c..91df7c51806c 100644 --- a/arch/x86/include/asm/mmzone_32.h +++ b/arch/x86/include/asm/mmzone_32.h @@ -47,7 +47,7 @@ static inline void resume_map_numa_kva(pgd_t *pgd) {} /* * generic node memory support, the following assumptions apply: * - * 1) memory comes in 64Mb contigious chunks which are either present or not + * 1) memory comes in 64Mb contiguous chunks which are either present or not * 2) we will not have more than 64Gb in total * * for now assume that 64Gb is max amount of RAM for whole system diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index 80e2984f521c..b414d2b401f6 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h @@ -55,7 +55,7 @@ #define DESC_STATUS_SOURCE_TIMEOUT 3 /* - * source side threshholds at which message retries print a warning + * source side thresholds at which message retries print a warning */ #define SOURCE_TIMEOUT_LIMIT 20 #define DESTINATION_TIMEOUT_LIMIT 20 diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 67e929b89875..1c2c4838d35c 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1122,7 +1122,7 @@ static int __init acpi_parse_madt_ioapic_entries(void) if (!acpi_sci_override_gsi) acpi_sci_ioapic_setup(acpi_gbl_FADT.sci_interrupt, 0, 0); - /* Fill in identity legacy mapings where no override */ + /* Fill in identity legacy mappings where no override */ mp_config_acpi_legacy_irqs(); count = diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 0285521e0a99..42ac5e000995 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1637,7 +1637,7 @@ retry: goto out; /* - * aperture was sucessfully enlarged by 128 MB, try + * aperture was successfully enlarged by 128 MB, try * allocation again */ goto retry; @@ -2396,7 +2396,7 @@ int __init amd_iommu_init_passthrough(void) struct pci_dev *dev = NULL; u16 devid, devid2; - /* allocate passthroug domain */ + /* allocate passthrough domain */ pt_domain = protection_domain_alloc(); if (!pt_domain) return -ENOMEM; diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index b5801c311846..35be5802ac1e 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1229,7 +1229,7 @@ x86_perf_event_set_period(struct perf_event *event, return 0; /* - * If we are way outside a reasoable range then just skip forward: + * If we are way outside a reasonable range then just skip forward: */ if (unlikely(left <= -period)) { left = period; diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 7b5169d2b000..7d377379fa4a 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -514,7 +514,7 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs, /* * Interrupts are disabled on entry as trap3 is an interrupt gate and they - * remain disabled thorough out this function. + * remain disabled throughout this function. */ static int __kprobes kprobe_handler(struct pt_regs *regs) { @@ -851,7 +851,7 @@ no_change: /* * Interrupts are disabled on entry as trap1 is an interrupt gate and they - * remain disabled thoroughout this function. + * remain disabled throughout this function. */ static int __kprobes post_kprobe_handler(struct pt_regs *regs) { diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 16ccbd77917f..d16d576beebf 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -203,7 +203,7 @@ static void disarm_kmmio_fault_page(struct kmmio_fault_page *f) */ /* * Interrupts are disabled on entry as trap3 is an interrupt gate - * and they remain disabled thorough out this function. + * and they remain disabled throughout this function. */ int kmmio_handler(struct pt_regs *regs, unsigned long addr) { @@ -302,7 +302,7 @@ no_kmmio: /* * Interrupts are disabled on entry as trap1 is an interrupt gate - * and they remain disabled thorough out this function. + * and they remain disabled throughout this function. * This must always get called as the pair to kmmio_handler(). */ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) -- cgit v1.2.3 From 6070d81eb5f2d4943223c96e7609a53cdc984364 Mon Sep 17 00:00:00 2001 From: Adam Buchbinder Date: Fri, 4 Dec 2009 15:47:01 -0500 Subject: tree-wide: fix misspelling of "definition" in comments "Definition" is misspelled "defintion" in several comments; this patch fixes them. No code changes. Signed-off-by: Adam Buchbinder Signed-off-by: Jiri Kosina --- arch/x86/include/asm/sigcontext.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/sigcontext.h b/arch/x86/include/asm/sigcontext.h index 72e5a4491661..04459d25e66e 100644 --- a/arch/x86/include/asm/sigcontext.h +++ b/arch/x86/include/asm/sigcontext.h @@ -124,7 +124,7 @@ struct sigcontext { * fpstate is really (struct _fpstate *) or (struct _xstate *) * depending on the FP_XSTATE_MAGIC1 encoded in the SW reserved * bytes of (struct _fpstate) and FP_XSTATE_MAGIC2 present at the end - * of extended memory layout. See comments at the defintion of + * of extended memory layout. See comments at the definition of * (struct _fpx_sw_bytes) */ void __user *fpstate; /* zero when no FPU/extended context */ @@ -219,7 +219,7 @@ struct sigcontext { * fpstate is really (struct _fpstate *) or (struct _xstate *) * depending on the FP_XSTATE_MAGIC1 encoded in the SW reserved * bytes of (struct _fpstate) and FP_XSTATE_MAGIC2 present at the end - * of extended memory layout. See comments at the defintion of + * of extended memory layout. See comments at the definition of * (struct _fpx_sw_bytes) */ void __user *fpstate; /* zero when no FPU/extended context */ -- cgit v1.2.3 From 575939cf548951dde8df0786899ea5a91bb669b2 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 24 Nov 2009 18:05:12 -0800 Subject: x86/PCI: claim SR-IOV BARs in pcibios_allocate_resource This allows us to use the BIOS SR-IOV allocations rather than assigning our own later on. Signed-off-by: Yinghai Lu Signed-off-by: Jesse Barnes --- arch/x86/pci/i386.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index b73c09f45210..5dc9e8c63fcd 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -146,16 +146,29 @@ static void __init pcibios_allocate_bus_resources(struct list_head *bus_list) } } +struct pci_check_idx_range { + int start; + int end; +}; + static void __init pcibios_allocate_resources(int pass) { struct pci_dev *dev = NULL; - int idx, disabled; + int idx, disabled, i; u16 command; struct resource *r; + struct pci_check_idx_range idx_range[] = { + { PCI_STD_RESOURCES, PCI_STD_RESOURCE_END }, +#ifdef CONFIG_PCI_IOV + { PCI_IOV_RESOURCES, PCI_IOV_RESOURCE_END }, +#endif + }; + for_each_pci_dev(dev) { pci_read_config_word(dev, PCI_COMMAND, &command); - for (idx = 0; idx < PCI_ROM_RESOURCE; idx++) { + for (i = 0; i < ARRAY_SIZE(idx_range); i++) + for (idx = idx_range[i].start; idx <= idx_range[i].end; idx++) { r = &dev->resource[idx]; if (r->parent) /* Already allocated */ continue; -- cgit v1.2.3 From 5d990b627537e59a3a2f039ff588a4750e9c1a6a Mon Sep 17 00:00:00 2001 From: Chris Wright Date: Fri, 4 Dec 2009 12:15:21 -0800 Subject: PCI: add pci_request_acs Commit ae21ee65e8bc228416bbcc8a1da01c56a847a60c "PCI: acs p2p upsteram forwarding enabling" doesn't actually enable ACS. Add a function to pci core to allow an IOMMU to request that ACS be enabled. The existing mechanism of using iommu_found() in the pci core to know when ACS should be enabled doesn't actually work due to initialization order; iommu has only been detected not initialized. Have Intel and AMD IOMMUs request ACS, and Xen does as well during early init of dom0. Cc: Allen Kay Cc: David Woodhouse Cc: Jeremy Fitzhardinge Cc: Joerg Roedel Signed-off-by: Chris Wright Signed-off-by: Jesse Barnes --- arch/x86/kernel/amd_iommu_init.c | 2 ++ arch/x86/xen/enlighten.c | 5 +++++ 2 files changed, 7 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index b4b61d462dcc..e60530a5f524 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -1330,6 +1330,8 @@ void __init amd_iommu_detect(void) gart_iommu_aperture_disabled = 1; gart_iommu_aperture = 0; #endif + /* Make sure ACS will be enabled */ + pci_request_acs(); } } diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 5bccd706232c..e2511bccbc8d 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -1170,7 +1171,11 @@ asmlinkage void __init xen_start_kernel(void) add_preferred_console("xenboot", 0, NULL); add_preferred_console("tty", 0, NULL); add_preferred_console("hvc", 0, NULL); + } else { + /* Make sure ACS will be enabled */ + pci_request_acs(); } + xen_raw_console_write("about to get started...\n"); -- cgit v1.2.3 From 4832ddda2ec4df96ea1eed334ae2dbd65fc1f541 Mon Sep 17 00:00:00 2001 From: Leann Ogasawara Date: Fri, 4 Dec 2009 15:42:22 -0800 Subject: x86: ASUS P4S800 reboot=bios quirk Bug reporter noted their system with an ASUS P4S800 motherboard would hang when rebooting unless reboot=b was specified. Their dmidecode didn't contain descriptive System Information for Manufacturer or Product Name, so I used their Base Board Information to create a reboot quirk patch. The bug reporter confirmed this patch resolves the reboot hang. Handle 0x0001, DMI type 1, 25 bytes System Information Manufacturer: System Manufacturer Product Name: System Name Version: System Version Serial Number: SYS-1234567890 UUID: E0BFCD8B-7948-D911-A953-E486B4EEB67F Wake-up Type: Power Switch Handle 0x0002, DMI type 2, 8 bytes Base Board Information Manufacturer: ASUSTeK Computer INC. Product Name: P4S800 Version: REV 1.xx Serial Number: xxxxxxxxxxx BugLink: http://bugs.launchpad.net/bugs/366682 ASUS P4S800 will hang when rebooting unless reboot=b is specified. Add a quirk to reboot through the bios. Signed-off-by: Leann Ogasawara LKML-Reference: <1259972107.4629.275.camel@emiko> Signed-off-by: H. Peter Anvin Cc: --- arch/x86/kernel/reboot.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index f93078746e00..6caf26010970 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -259,6 +259,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "SBC-FITPC2"), }, }, + { /* Handle problems with rebooting on ASUS P4S800 */ + .callback = set_bios_reboot, + .ident = "ASUS P4S800", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), + DMI_MATCH(DMI_BOARD_NAME, "P4S800"), + }, + }, { } }; -- cgit v1.2.3 From 2f0993e0fb663c49e4d1e02654f6203246be4817 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 5 Dec 2009 07:06:10 +0100 Subject: hw-breakpoints: Drop callback and task parameters from modify helper Drop the callback and task parameters from modify_user_hw_breakpoint(). For now we have no user that need to modify a breakpoint to the point of changing its handler or its task context. Signed-off-by: Frederic Weisbecker Cc: "K. Prasad" --- arch/x86/kernel/ptrace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 04d182a7cfdb..dbb395572ae2 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -618,7 +618,7 @@ ptrace_modify_breakpoint(struct perf_event *bp, int len, int type, attr.bp_type = gen_type; attr.disabled = disabled; - return modify_user_hw_breakpoint(bp, &attr, bp->callback, tsk); + return modify_user_hw_breakpoint(bp, &attr); } /* @@ -740,7 +740,7 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, attr = bp->attr; attr.bp_addr = addr; - bp = modify_user_hw_breakpoint(bp, &attr, bp->callback, tsk); + bp = modify_user_hw_breakpoint(bp, &attr); } /* * CHECKME: the previous code returned -EIO if the addr wasn't a -- cgit v1.2.3 From b326e9560a28fc3e950637ef51847ed8f05c1335 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 5 Dec 2009 09:44:31 +0100 Subject: hw-breakpoints: Use overflow handler instead of the event callback struct perf_event::event callback was called when a breakpoint triggers. But this is a rather opaque callback, pretty tied-only to the breakpoint API and not really integrated into perf as it triggers even when we don't overflow. We prefer to use overflow_handler() as it fits into the perf events rules, being called only when we overflow. Reported-by: Peter Zijlstra Signed-off-by: Frederic Weisbecker Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: "K. Prasad" --- arch/x86/kernel/hw_breakpoint.c | 5 ++--- arch/x86/kernel/ptrace.c | 9 ++++++--- 2 files changed, 8 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index d42f65ac4927..05d5fec64a94 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -362,8 +362,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp, return ret; } - if (bp->callback) - ret = arch_store_info(bp); + ret = arch_store_info(bp); if (ret < 0) return ret; @@ -519,7 +518,7 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args) break; } - (bp->callback)(bp, args->regs); + perf_bp_event(bp, args->regs); rcu_read_unlock(); } diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index dbb395572ae2..b361d28061d0 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -555,7 +555,9 @@ static int genregs_set(struct task_struct *target, return ret; } -static void ptrace_triggered(struct perf_event *bp, void *data) +static void ptrace_triggered(struct perf_event *bp, int nmi, + struct perf_sample_data *data, + struct pt_regs *regs) { int i; struct thread_struct *thread = &(current->thread); @@ -599,7 +601,7 @@ ptrace_modify_breakpoint(struct perf_event *bp, int len, int type, { int err; int gen_len, gen_type; - DEFINE_BREAKPOINT_ATTR(attr); + struct perf_event_attr attr; /* * We shoud have at least an inactive breakpoint at this @@ -721,9 +723,10 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, { struct perf_event *bp; struct thread_struct *t = &tsk->thread; - DEFINE_BREAKPOINT_ATTR(attr); + struct perf_event_attr attr; if (!t->ptrace_bps[nr]) { + hw_breakpoint_init(&attr); /* * Put stub len and type to register (reserve) an inactive but * correct bp -- cgit v1.2.3 From 7f33f9c5cc3c99aeaf4d266a7ed502b828115a53 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 5 Dec 2009 12:01:17 +0100 Subject: x86/perf: Exclude the debug stack from the callchains Dumping the callchains from breakpoint events with perf gives strange results: 3.75% perf [kernel] [k] _raw_read_unlock | --- _raw_read_unlock perf_callchain perf_prepare_sample __perf_event_overflow perf_swevent_overflow perf_swevent_add perf_bp_event hw_breakpoint_exceptions_notify notifier_call_chain __atomic_notifier_call_chain atomic_notifier_call_chain notify_die do_debug debug munmap We are infected with all the debug stack. Like the nmi stack, the debug stack is undesired as it is part of the profiling path, not helpful for the user. Ignore it. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: "K. Prasad" --- arch/x86/kernel/cpu/perf_event.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index c1bbed1021d9..d35f26076ae5 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -2287,7 +2287,7 @@ void callchain_store(struct perf_callchain_entry *entry, u64 ip) static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry); -static DEFINE_PER_CPU(int, in_nmi_frame); +static DEFINE_PER_CPU(int, in_ignored_frame); static void @@ -2303,8 +2303,9 @@ static void backtrace_warning(void *data, char *msg) static int backtrace_stack(void *data, char *name) { - per_cpu(in_nmi_frame, smp_processor_id()) = - x86_is_stack_id(NMI_STACK, name); + per_cpu(in_ignored_frame, smp_processor_id()) = + x86_is_stack_id(NMI_STACK, name) || + x86_is_stack_id(DEBUG_STACK, name); return 0; } @@ -2313,7 +2314,7 @@ static void backtrace_address(void *data, unsigned long addr, int reliable) { struct perf_callchain_entry *entry = data; - if (per_cpu(in_nmi_frame, smp_processor_id())) + if (per_cpu(in_ignored_frame, smp_processor_id())) return; if (reliable) -- cgit v1.2.3 From b625b3b3b740e177a1148594cd3ad5ff52f35315 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 6 Dec 2009 00:52:56 +0100 Subject: x86: Fixup wrong debug exception frame link in stacktraces While dumping a stacktrace, the end of the exception stack won't link the frame pointer to the previous stack. The interrupted stack will then be considered as unreliable and ignored by perf, as the frame pointer is unreliable itself. This happens because we overwrite the frame pointer that links to the interrupted frame with the address of the exception stack. This is done in order to reserve space inside. But rbp has been chosen here only because it is not a scratch register, so that the address of the exception stack remains in rbp after calling do_debug(), we can then release the exception stack space without the need to retrieve its address again. But we can pick another non-scratch register to do that, so that we preserve the link to the interrupted stack frame in the stacktraces. Just randomly choose r12. Every registers are saved just before and restored just after calling do_debug(). And r12 is not used in the middle, which makes it a perfect candidate. Example: perf record -g -a -c 1 -f -e mem:$(tasklist_lock_addr):rw Before: 44.18% [k] _raw_read_lock | | --- |--6.31%-- waitid | |--4.26%-- writev | |--3.63%-- __select | |--3.15%-- __waitpid | | | |--28.57%-- 0x8b52e00000139f | | | |--28.57%-- 0x8b52e0000013c6 | | | |--14.29%-- 0x7fde786dc000 | | | |--14.29%-- 0x62696c2f7273752f | | | --14.29%-- 0x1ea9df800000000 | |--3.00%-- __poll After: 43.94% [k] _raw_read_lock | --- _read_lock | |--60.53%-- send_sigio | __kill_fasync | kill_fasync | evdev_pass_event | evdev_event | input_pass_event | input_handle_event | input_event | synaptics_process_byte | psmouse_handle_byte | psmouse_interrupt | serio_interrupt | i8042_interrupt | handle_IRQ_event | handle_edge_irq | handle_irq | __irqentry_text_start | ret_from_intr | | | |--30.43%-- __select | | | |--17.39%-- 0x454f15 | | | |--13.04%-- __read | | | |--13.04%-- vread_hpet | | | |--13.04%-- _xcb_lock_io | | | --13.04%-- 0x7f630878ce87 Note: it does not only affect perf events but also other stacktraces in x86-64. They were considered as unreliable once we quit the debug stack frame. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: "K. Prasad" Cc: Thomas Gleixner Cc: "H. Peter Anvin" --- arch/x86/kernel/entry_64.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 722df1b1152d..0f08a0cea3e0 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1076,10 +1076,10 @@ ENTRY(\sym) TRACE_IRQS_OFF movq %rsp,%rdi /* pt_regs pointer */ xorl %esi,%esi /* no error code */ - PER_CPU(init_tss, %rbp) - subq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp) + PER_CPU(init_tss, %r12) + subq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%r12) call \do_sym - addq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp) + addq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%r12) jmp paranoid_exit /* %ebx: no swapgs flag */ CFI_ENDPROC END(\sym) -- cgit v1.2.3 From af2d8289f57e427836be482c6f72cca674028121 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 6 Dec 2009 05:34:27 +0100 Subject: x86: Fixup wrong irq frame link in stacktraces When we enter in irq, two things can happen to preserve the link to the previous frame pointer: - If we were in an irq already, we don't switch to the irq stack as we are inside. We just need to save the previous frame pointer and to link the new one to the previous. - Otherwise we need another level of indirection. We enter the irq with the previous stack. We save the previous bp inside and make bp pointing to its saved address. Then we switch to the irq stack and push bp another time but to the new stack. This makes two levels to dereference instead of one. In the second case, the current stacktrace code omits the second level and loses the frame pointer accuracy. The stack that follows will then be considered as unreliable. Handling that makes the perf callchain happier. Before: 43.94% [k] _raw_read_lock | --- _read_lock | |--60.53%-- send_sigio | __kill_fasync | kill_fasync | evdev_pass_event | evdev_event | input_pass_event | input_handle_event | input_event | synaptics_process_byte | psmouse_handle_byte | psmouse_interrupt | serio_interrupt | i8042_interrupt | handle_IRQ_event | handle_edge_irq | handle_irq | __irqentry_text_start | ret_from_intr | | | |--30.43%-- __select | | | |--17.39%-- 0x454f15 | | | |--13.04%-- __read | | | |--13.04%-- vread_hpet | | | |--13.04%-- _xcb_lock_io | | | --13.04%-- 0x7f630878ce8 After: 50.00% [k] _raw_read_lock | --- _read_lock | |--98.97%-- send_sigio | __kill_fasync | kill_fasync | evdev_pass_event | evdev_event | input_pass_event | input_handle_event | input_event | | | |--96.88%-- synaptics_process_byte | | psmouse_handle_byte | | psmouse_interrupt | | serio_interrupt | | i8042_interrupt | | handle_IRQ_event | | handle_edge_irq | | handle_irq | | __irqentry_text_start | | ret_from_intr | | | | | |--39.78%-- __const_udelay | | | | | | | |--91.89%-- ath5k_hw_register_timeout | | | | ath5k_hw_noise_floor_calibration | | | | ath5k_hw_reset | | | | ath5k_reset | | | | ath5k_config | | | | ieee80211_hw_config | | | | | | | | | |--88.24%-- ieee80211_scan_work | | | | | worker_thread | | | | | kthread | | | | | child_rip | | | | | | | | | --11.76%-- ieee80211_scan_completed | | | | ieee80211_scan_work | | | | worker_thread | | | | kthread | | | | child_rip | | | | | | | --8.11%-- ath5k_hw_noise_floor_calibration | | | ath5k_hw_reset | | | ath5k_reset | | | ath5k_config Note: This does not only affect perf events but also x86-64 stacktraces. They were considered as unreliable once we quit the irq stack frame. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: "K. Prasad" Cc: Thomas Gleixner Cc: "H. Peter Anvin" --- arch/x86/kernel/dumpstack_64.c | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index a071e6be177e..004b8aa6a35f 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -101,6 +101,35 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, return NULL; } +static inline int +in_irq_stack(unsigned long *stack, unsigned long *irq_stack, + unsigned long *irq_stack_end) +{ + return (stack >= irq_stack && stack < irq_stack_end); +} + +/* + * We are returning from the irq stack and go to the previous one. + * If the previous stack is also in the irq stack, then bp in the first + * frame of the irq stack points to the previous, interrupted one. + * Otherwise we have another level of indirection: We first save + * the bp of the previous stack, then we switch the stack to the irq one + * and save a new bp that links to the previous one. + * (See save_args()) + */ +static inline unsigned long +fixup_bp_irq_link(unsigned long bp, unsigned long *stack, + unsigned long *irq_stack, unsigned long *irq_stack_end) +{ +#ifdef CONFIG_FRAME_POINTER + struct stack_frame *frame = (struct stack_frame *)bp; + + if (!in_irq_stack(stack, irq_stack, irq_stack_end)) + return (unsigned long)frame->next_frame; +#endif + return bp; +} + /* * x86-64 can have up to three kernel stacks: * process stack @@ -173,7 +202,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, irq_stack = irq_stack_end - (IRQ_STACK_SIZE - 64) / sizeof(*irq_stack); - if (stack >= irq_stack && stack < irq_stack_end) { + if (in_irq_stack(stack, irq_stack, irq_stack_end)) { if (ops->stack(data, "IRQ") < 0) break; bp = print_context_stack(tinfo, stack, bp, @@ -184,6 +213,8 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, * pointer (index -1 to end) in the IRQ stack: */ stack = (unsigned long *) (irq_stack_end[-1]); + bp = fixup_bp_irq_link(bp, stack, irq_stack, + irq_stack_end); irq_stack_end = NULL; ops->stack(data, "EOI"); continue; -- cgit v1.2.3 From 8055039c2a2454c7159dcbde3161943b757a6e0e Mon Sep 17 00:00:00 2001 From: Shaun Patterson Date: Sat, 5 Dec 2009 10:41:34 -0500 Subject: x86: Fix typo in arch/x86/mm/kmmio.c Signed-off-by: Shaun Patterson Cc: Jiri Kosina Cc: pq@iki.fi LKML-Reference: <1260027694.10074.170.camel@linux-4lgc.site> Signed-off-by: Ingo Molnar --- arch/x86/mm/kmmio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 16ccbd77917f..72f157247ab1 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -490,7 +490,7 @@ static void remove_kmmio_fault_pages(struct rcu_head *head) * 2. remove_kmmio_fault_pages() * Remove the pages from kmmio_page_table. * 3. rcu_free_kmmio_fault_pages() - * Actally free the kmmio_fault_page structs as with RCU. + * Actually free the kmmio_fault_page structs as with RCU. */ void unregister_kmmio_probe(struct kmmio_probe *p) { -- cgit v1.2.3 From be2bf0a2dfbba785860284968fa055006eb1610e Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Sun, 6 Dec 2009 12:40:59 +0100 Subject: x86, perf probe: Fix warning in test_get_len() Fix the following warning: arch/x86/tools/test_get_len.c: In function "main": arch/x86/tools/test_get_len.c:116: warning: unused variable "c" Signed-off-by: Jean Delvare Cc: Masami Hiramatsu Signed-off-by: Ingo Molnar --- arch/x86/tools/test_get_len.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/tools/test_get_len.c b/arch/x86/tools/test_get_len.c index d8214dc03fa7..bee8d6ac2691 100644 --- a/arch/x86/tools/test_get_len.c +++ b/arch/x86/tools/test_get_len.c @@ -113,7 +113,7 @@ int main(int argc, char **argv) char line[BUFSIZE], sym[BUFSIZE] = ""; unsigned char insn_buf[16]; struct insn insn; - int insns = 0, c; + int insns = 0; int warnings = 0; parse_args(argc, argv); -- cgit v1.2.3 From cbe5c34c8c1f8ed1afbe6273f4ad57fcfad7822f Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Sun, 6 Dec 2009 20:14:29 +0900 Subject: x86: Compile insn.c and inat.c only for KPROBES At least, insn.c and inat.c is needed for kprobe for now. So, this compile those only if KPROBES is enabled. Signed-off-by: OGAWA Hirofumi Cc: Masami Hiramatsu LKML-Reference: <878wdg8icq.fsf@devron.myhome.or.jp> Signed-off-by: Ingo Molnar --- arch/x86/Kconfig.debug | 4 ++-- arch/x86/lib/Makefile | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 7d0b681a132b..0e90929da40f 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -187,8 +187,8 @@ config HAVE_MMIOTRACE_SUPPORT def_bool y config X86_DECODER_SELFTEST - bool "x86 instruction decoder selftest" - depends on DEBUG_KERNEL + bool "x86 instruction decoder selftest" + depends on DEBUG_KERNEL && KPROBES ---help--- Perform x86 instruction decoder selftests at build time. This option is useful for checking the sanity of x86 instruction diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index a2d6472895fb..442b3b3b2d80 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -20,7 +20,7 @@ lib-y := delay.o lib-y += thunk_$(BITS).o lib-y += usercopy_$(BITS).o getuser.o putuser.o lib-y += memcpy_$(BITS).o -lib-y += insn.o inat.o +lib-$(CONFIG_KPROBES) += insn.o inat.o obj-y += msr-reg.o msr-reg-export.o -- cgit v1.2.3 From a946d8f11f0da9cfc714248036fcfd3a794d1e27 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 7 Dec 2009 12:59:46 +0100 Subject: x86: Fix bogus warning in apic_noop.apic_write() apic_noop is used to provide dummy apic functions. It's installed when the CPU has no APIC or when the APIC is disabled on the kernel command line. The apic_noop implementation of apic_write() warns when the CPU has an APIC or when the APIC is not disabled. That's bogus. The warning should only happen when the CPU has an APIC _AND_ the APIC is not disabled. apic_noop.apic_read() has the correct check. Signed-off-by: Thomas Gleixner Cc: Cyrill Gorcunov Cc: # in <= .32 this typo resides in native_apic_write_dummy() LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/apic_noop.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index d9acc3bee0f4..e31b9ffe25f5 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -127,7 +127,7 @@ static u32 noop_apic_read(u32 reg) static void noop_apic_write(u32 reg, u32 v) { - WARN_ON_ONCE((cpu_has_apic || !disable_apic)); + WARN_ON_ONCE(cpu_has_apic && !disable_apic); } struct apic apic_noop = { -- cgit v1.2.3 From d32ba45503acf9c23b301eba2397ca2ee322627b Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 7 Dec 2009 12:00:33 -0500 Subject: x86 insn: Delete empty or incomplete inat-tables.c Delete empty or incomplete inat-tables.c if gen-insn-attr-x86.awk failed, because it causes a build error if user tries to build kernel next time. Reported-by: Arkadiusz Miskiewicz Signed-off-by: Masami Hiramatsu Cc: systemtap Cc: DLE Cc: Jens Axboe Cc: Frederic Weisbecker LKML-Reference: <20091207170033.19230.37688.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar --- arch/x86/lib/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 442b3b3b2d80..45b20e486c2f 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -5,7 +5,7 @@ inat_tables_script = $(srctree)/arch/x86/tools/gen-insn-attr-x86.awk inat_tables_maps = $(srctree)/arch/x86/lib/x86-opcode-map.txt quiet_cmd_inat_tables = GEN $@ - cmd_inat_tables = $(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@ + cmd_inat_tables = $(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@ || rm -f $@ $(obj)/inat-tables.c: $(inat_tables_script) $(inat_tables_maps) $(call cmd,inat_tables) -- cgit v1.2.3 From bc09effabf0c5c6c7021e5ef9af15a23579b32a8 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 8 Dec 2009 11:21:37 +0900 Subject: x86/mce: Set up timer unconditionally mce_timer must be passed to setup_timer() in all cases, no matter whether it is going to be actually used. Otherwise, when the CPU gets brought down, its call to del_timer_sync() will never return, as the timer won't have a base associated, and hence lock_timer_base() will loop infinitely. Signed-off-by: Jan Beulich Signed-off-by: Hidetoshi Seto Cc: LKML-Reference: <4B1DB831.2030801@jp.fujitsu.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index d7ebf25d10ed..a96e5cd256a9 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1388,13 +1388,14 @@ static void __mcheck_cpu_init_timer(void) struct timer_list *t = &__get_cpu_var(mce_timer); int *n = &__get_cpu_var(mce_next_interval); + setup_timer(t, mce_start_timer, smp_processor_id()); + if (mce_ignore_ce) return; *n = check_interval * HZ; if (!*n) return; - setup_timer(t, mce_start_timer, smp_processor_id()); t->expires = round_jiffies(jiffies + *n); add_timer_on(t, smp_processor_id()); } -- cgit v1.2.3 From 5c0e9f28da84c68ce0ae68b7a75faaf862e156e2 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Tue, 8 Dec 2009 16:52:44 +0900 Subject: x86, mce: fix confusion between bank attributes and mce attributes Commit cebe182033f156b430952370fb0f9dbe6e89b081 had an unnecessary, wrong change: &mce_banks[i].attr is equivalent to the former bank_attrs[i], not to mce_attrs[i]. Signed-off-by: Hidetoshi Seto Acked-by: Andi Kleen LKML-Reference: <4B1E05CC.4040703@jp.fujitsu.com> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index a96e5cd256a9..a8aacd4b513c 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1929,7 +1929,7 @@ error2: sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[j].attr); error: while (--i >= 0) - sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[i].attr); + sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); sysdev_unregister(&per_cpu(mce_dev, cpu)); -- cgit v1.2.3 From f58e1f53de52a70391b6478617311207c7203363 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 8 Dec 2009 22:30:50 -0800 Subject: arch/x86/kernel/microcode*: Use pr_fmt() and remove duplicated KERN_ERR prefix - Use #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - Remove "microcode: " prefix from each pr_ - Fix duplicated KERN_ERR prefix - Coalesce pr_ format strings - Add a space after an exclamation point No other change in output. Signed-off-by: Joe Perches Cc: Andy Whitcroft Cc: Andreas Herrmann LKML-Reference: <1260340250.27677.191.camel@Joe-Laptop.home> Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode_amd.c | 40 ++++++++++++++++----------------- arch/x86/kernel/microcode_core.c | 26 ++++++++++++---------- arch/x86/kernel/microcode_intel.c | 47 +++++++++++++++++---------------------- 3 files changed, 53 insertions(+), 60 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 63123d902103..37542b67c57e 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -13,6 +13,9 @@ * Licensed under the terms of the GNU General Public * License version 2. See file COPYING for details. */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -81,7 +84,7 @@ static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) memset(csig, 0, sizeof(*csig)); rdmsr(MSR_AMD64_PATCH_LEVEL, csig->rev, dummy); - pr_info("microcode: CPU%d: patch_level=0x%x\n", cpu, csig->rev); + pr_info("CPU%d: patch_level=0x%x\n", cpu, csig->rev); return 0; } @@ -111,8 +114,8 @@ static int get_matching_microcode(int cpu, void *mc, int rev) /* ucode might be chipset specific -- currently we don't support this */ if (mc_header->nb_dev_id || mc_header->sb_dev_id) { - pr_err(KERN_ERR "microcode: CPU%d: loading of chipset " - "specific code not yet supported\n", cpu); + pr_err("CPU%d: loading of chipset specific code not yet supported\n", + cpu); return 0; } @@ -141,12 +144,12 @@ static int apply_microcode_amd(int cpu) /* check current patch id and patch's id for match */ if (rev != mc_amd->hdr.patch_id) { - pr_err("microcode: CPU%d: update failed " - "(for patch_level=0x%x)\n", cpu, mc_amd->hdr.patch_id); + pr_err("CPU%d: update failed (for patch_level=0x%x)\n", + cpu, mc_amd->hdr.patch_id); return -1; } - pr_info("microcode: CPU%d: updated (new patch_level=0x%x)\n", cpu, rev); + pr_info("CPU%d: updated (new patch_level=0x%x)\n", cpu, rev); uci->cpu_sig.rev = rev; return 0; @@ -169,15 +172,14 @@ get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size) return NULL; if (section_hdr[0] != UCODE_UCODE_TYPE) { - pr_err("microcode: error: invalid type field in " - "container file section header\n"); + pr_err("error: invalid type field in container file section header\n"); return NULL; } total_size = (unsigned long) (section_hdr[4] + (section_hdr[5] << 8)); if (total_size > size || total_size > UCODE_MAX_SIZE) { - pr_err("microcode: error: size mismatch\n"); + pr_err("error: size mismatch\n"); return NULL; } @@ -206,14 +208,13 @@ static int install_equiv_cpu_table(const u8 *buf) size = buf_pos[2]; if (buf_pos[1] != UCODE_EQUIV_CPU_TABLE_TYPE || !size) { - pr_err("microcode: error: invalid type field in " - "container file section header\n"); + pr_err("error: invalid type field in container file section header\n"); return 0; } equiv_cpu_table = (struct equiv_cpu_entry *) vmalloc(size); if (!equiv_cpu_table) { - pr_err("microcode: failed to allocate equivalent CPU table\n"); + pr_err("failed to allocate equivalent CPU table\n"); return 0; } @@ -246,7 +247,7 @@ generic_load_microcode(int cpu, const u8 *data, size_t size) offset = install_equiv_cpu_table(ucode_ptr); if (!offset) { - pr_err("microcode: failed to create equivalent cpu table\n"); + pr_err("failed to create equivalent cpu table\n"); return UCODE_ERROR; } @@ -277,8 +278,7 @@ generic_load_microcode(int cpu, const u8 *data, size_t size) if (!leftover) { vfree(uci->mc); uci->mc = new_mc; - pr_debug("microcode: CPU%d found a matching microcode " - "update with version 0x%x (current=0x%x)\n", + pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n", cpu, new_rev, uci->cpu_sig.rev); } else { vfree(new_mc); @@ -300,7 +300,7 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device) return UCODE_NFOUND; if (*(u32 *)firmware->data != UCODE_MAGIC) { - pr_err("microcode: invalid UCODE_MAGIC (0x%08x)\n", + pr_err("invalid UCODE_MAGIC (0x%08x)\n", *(u32 *)firmware->data); return UCODE_ERROR; } @@ -313,8 +313,7 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device) static enum ucode_state request_microcode_user(int cpu, const void __user *buf, size_t size) { - pr_info("microcode: AMD microcode update via " - "/dev/cpu/microcode not supported\n"); + pr_info("AMD microcode update via /dev/cpu/microcode not supported\n"); return UCODE_ERROR; } @@ -334,14 +333,13 @@ void init_microcode_amd(struct device *device) WARN_ON(c->x86_vendor != X86_VENDOR_AMD); if (c->x86 < 0x10) { - pr_warning("microcode: AMD CPU family 0x%x not supported\n", - c->x86); + pr_warning("AMD CPU family 0x%x not supported\n", c->x86); return; } supported_cpu = 1; if (request_firmware(&firmware, fw_name, device)) - pr_err("microcode: failed to load file %s\n", fw_name); + pr_err("failed to load file %s\n", fw_name); } void fini_microcode_amd(void) diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index e68aae397869..844c02c65fcb 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -70,6 +70,9 @@ * Fix sigmatch() macro to handle old CPUs with pf == 0. * Thanks to Stuart Swales for pointing out this bug. */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -209,7 +212,7 @@ static ssize_t microcode_write(struct file *file, const char __user *buf, ssize_t ret = -EINVAL; if ((len >> PAGE_SHIFT) > totalram_pages) { - pr_err("microcode: too much data (max %ld pages)\n", totalram_pages); + pr_err("too much data (max %ld pages)\n", totalram_pages); return ret; } @@ -244,7 +247,7 @@ static int __init microcode_dev_init(void) error = misc_register(µcode_dev); if (error) { - pr_err("microcode: can't misc_register on minor=%d\n", MICROCODE_MINOR); + pr_err("can't misc_register on minor=%d\n", MICROCODE_MINOR); return error; } @@ -359,7 +362,7 @@ static enum ucode_state microcode_resume_cpu(int cpu) if (!uci->mc) return UCODE_NFOUND; - pr_debug("microcode: CPU%d updated upon resume\n", cpu); + pr_debug("CPU%d updated upon resume\n", cpu); apply_microcode_on_target(cpu); return UCODE_OK; @@ -379,7 +382,7 @@ static enum ucode_state microcode_init_cpu(int cpu) ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev); if (ustate == UCODE_OK) { - pr_debug("microcode: CPU%d updated upon init\n", cpu); + pr_debug("CPU%d updated upon init\n", cpu); apply_microcode_on_target(cpu); } @@ -406,7 +409,7 @@ static int mc_sysdev_add(struct sys_device *sys_dev) if (!cpu_online(cpu)) return 0; - pr_debug("microcode: CPU%d added\n", cpu); + pr_debug("CPU%d added\n", cpu); err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group); if (err) @@ -425,7 +428,7 @@ static int mc_sysdev_remove(struct sys_device *sys_dev) if (!cpu_online(cpu)) return 0; - pr_debug("microcode: CPU%d removed\n", cpu); + pr_debug("CPU%d removed\n", cpu); microcode_fini_cpu(cpu); sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); return 0; @@ -473,15 +476,15 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) microcode_update_cpu(cpu); case CPU_DOWN_FAILED: case CPU_DOWN_FAILED_FROZEN: - pr_debug("microcode: CPU%d added\n", cpu); + pr_debug("CPU%d added\n", cpu); if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group)) - pr_err("microcode: Failed to create group for CPU%d\n", cpu); + pr_err("Failed to create group for CPU%d\n", cpu); break; case CPU_DOWN_PREPARE: case CPU_DOWN_PREPARE_FROZEN: /* Suspend is in progress, only remove the interface */ sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); - pr_debug("microcode: CPU%d removed\n", cpu); + pr_debug("CPU%d removed\n", cpu); break; case CPU_DEAD: case CPU_UP_CANCELED_FROZEN: @@ -507,7 +510,7 @@ static int __init microcode_init(void) microcode_ops = init_amd_microcode(); if (!microcode_ops) { - pr_err("microcode: no support for this CPU vendor\n"); + pr_err("no support for this CPU vendor\n"); return -ENODEV; } @@ -541,8 +544,7 @@ static int __init microcode_init(void) register_hotcpu_notifier(&mc_cpu_notifier); pr_info("Microcode Update Driver: v" MICROCODE_VERSION - " ," - " Peter Oruba\n"); + " , Peter Oruba\n"); return 0; } diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index 0d334ddd0a96..ebd193e476ca 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c @@ -70,6 +70,9 @@ * Fix sigmatch() macro to handle old CPUs with pf == 0. * Thanks to Stuart Swales for pointing out this bug. */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -146,8 +149,7 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || cpu_has(c, X86_FEATURE_IA64)) { - printk(KERN_ERR "microcode: CPU%d not a capable Intel " - "processor\n", cpu_num); + pr_err("CPU%d not a capable Intel processor\n", cpu_num); return -1; } @@ -165,8 +167,8 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) /* get the current revision from MSR 0x8B */ rdmsr(MSR_IA32_UCODE_REV, val[0], csig->rev); - printk(KERN_INFO "microcode: CPU%d sig=0x%x, pf=0x%x, revision=0x%x\n", - cpu_num, csig->sig, csig->pf, csig->rev); + pr_info("CPU%d sig=0x%x, pf=0x%x, revision=0x%x\n", + cpu_num, csig->sig, csig->pf, csig->rev); return 0; } @@ -194,28 +196,24 @@ static int microcode_sanity_check(void *mc) data_size = get_datasize(mc_header); if (data_size + MC_HEADER_SIZE > total_size) { - printk(KERN_ERR "microcode: error! " - "Bad data size in microcode data file\n"); + pr_err("error! Bad data size in microcode data file\n"); return -EINVAL; } if (mc_header->ldrver != 1 || mc_header->hdrver != 1) { - printk(KERN_ERR "microcode: error! " - "Unknown microcode update format\n"); + pr_err("error! Unknown microcode update format\n"); return -EINVAL; } ext_table_size = total_size - (MC_HEADER_SIZE + data_size); if (ext_table_size) { if ((ext_table_size < EXT_HEADER_SIZE) || ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) { - printk(KERN_ERR "microcode: error! " - "Small exttable size in microcode data file\n"); + pr_err("error! Small exttable size in microcode data file\n"); return -EINVAL; } ext_header = mc + MC_HEADER_SIZE + data_size; if (ext_table_size != exttable_size(ext_header)) { - printk(KERN_ERR "microcode: error! " - "Bad exttable size in microcode data file\n"); + pr_err("error! Bad exttable size in microcode data file\n"); return -EFAULT; } ext_sigcount = ext_header->count; @@ -230,8 +228,7 @@ static int microcode_sanity_check(void *mc) while (i--) ext_table_sum += ext_tablep[i]; if (ext_table_sum) { - printk(KERN_WARNING "microcode: aborting, " - "bad extended signature table checksum\n"); + pr_warning("aborting, bad extended signature table checksum\n"); return -EINVAL; } } @@ -242,7 +239,7 @@ static int microcode_sanity_check(void *mc) while (i--) orig_sum += ((int *)mc)[i]; if (orig_sum) { - printk(KERN_ERR "microcode: aborting, bad checksum\n"); + pr_err("aborting, bad checksum\n"); return -EINVAL; } if (!ext_table_size) @@ -255,7 +252,7 @@ static int microcode_sanity_check(void *mc) - (mc_header->sig + mc_header->pf + mc_header->cksum) + (ext_sig->sig + ext_sig->pf + ext_sig->cksum); if (sum) { - printk(KERN_ERR "microcode: aborting, bad checksum\n"); + pr_err("aborting, bad checksum\n"); return -EINVAL; } } @@ -327,13 +324,11 @@ static int apply_microcode(int cpu) rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); if (val[1] != mc_intel->hdr.rev) { - printk(KERN_ERR "microcode: CPU%d update " - "to revision 0x%x failed\n", - cpu_num, mc_intel->hdr.rev); + pr_err("CPU%d update to revision 0x%x failed\n", + cpu_num, mc_intel->hdr.rev); return -1; } - printk(KERN_INFO "microcode: CPU%d updated to revision " - "0x%x, date = %04x-%02x-%02x \n", + pr_info("CPU%d updated to revision 0x%x, date = %04x-%02x-%02x \n", cpu_num, val[1], mc_intel->hdr.date & 0xffff, mc_intel->hdr.date >> 24, @@ -362,8 +357,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, mc_size = get_totalsize(&mc_header); if (!mc_size || mc_size > leftover) { - printk(KERN_ERR "microcode: error!" - "Bad data in microcode data file\n"); + pr_err("error! Bad data in microcode data file\n"); break; } @@ -405,9 +399,8 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, vfree(uci->mc); uci->mc = (struct microcode_intel *)new_mc; - pr_debug("microcode: CPU%d found a matching microcode update with" - " version 0x%x (current=0x%x)\n", - cpu, new_rev, uci->cpu_sig.rev); + pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n", + cpu, new_rev, uci->cpu_sig.rev); out: return state; } @@ -429,7 +422,7 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device) c->x86, c->x86_model, c->x86_mask); if (request_firmware(&firmware, name, device)) { - pr_debug("microcode: data file %s load failed\n", name); + pr_debug("data file %s load failed\n", name); return UCODE_NFOUND; } -- cgit v1.2.3 From 44234adcdce38f83c56e05f808ce656175b4beeb Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 9 Dec 2009 09:25:48 +0100 Subject: hw-breakpoints: Modify breakpoints without unregistering them Currently, when ptrace needs to modify a breakpoint, like disabling it, changing its address, type or len, it calls modify_user_hw_breakpoint(). This latter will perform the heavy and racy task of unregistering the old breakpoint and registering a new one. This is racy as someone else might steal the reserved breakpoint slot under us, which is undesired as the breakpoint is only supposed to be modified, sometimes in the middle of a debugging workflow. We don't want our slot to be stolen in the middle. So instead of unregistering/registering the breakpoint, just disable it while we modify its breakpoint fields and re-enable it after if necessary. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Prasad LKML-Reference: <1260347148-5519-1-git-send-regression-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/ptrace.c | 57 ++++++++++++++++++++++-------------------------- 1 file changed, 26 insertions(+), 31 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index b361d28061d0..7079ddaf0731 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -595,7 +595,7 @@ static unsigned long ptrace_get_dr7(struct perf_event *bp[]) return dr7; } -static struct perf_event * +static int ptrace_modify_breakpoint(struct perf_event *bp, int len, int type, struct task_struct *tsk, int disabled) { @@ -609,11 +609,11 @@ ptrace_modify_breakpoint(struct perf_event *bp, int len, int type, * written the address register first */ if (!bp) - return ERR_PTR(-EINVAL); + return -EINVAL; err = arch_bp_generic_fields(len, type, &gen_len, &gen_type); if (err) - return ERR_PTR(err); + return err; attr = bp->attr; attr.bp_len = gen_len; @@ -658,28 +658,17 @@ restore: if (!second_pass) continue; - thread->ptrace_bps[i] = NULL; - bp = ptrace_modify_breakpoint(bp, len, type, + rc = ptrace_modify_breakpoint(bp, len, type, tsk, 1); - if (IS_ERR(bp)) { - rc = PTR_ERR(bp); - thread->ptrace_bps[i] = NULL; + if (rc) break; - } - thread->ptrace_bps[i] = bp; } continue; } - bp = ptrace_modify_breakpoint(bp, len, type, tsk, 0); - - /* Incorrect bp, or we have a bug in bp API */ - if (IS_ERR(bp)) { - rc = PTR_ERR(bp); - thread->ptrace_bps[i] = NULL; + rc = ptrace_modify_breakpoint(bp, len, type, tsk, 0); + if (rc) break; - } - thread->ptrace_bps[i] = bp; } /* * Make a second pass to free the remaining unused breakpoints @@ -737,26 +726,32 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, attr.disabled = 1; bp = register_user_hw_breakpoint(&attr, ptrace_triggered, tsk); + + /* + * CHECKME: the previous code returned -EIO if the addr wasn't + * a valid task virtual addr. The new one will return -EINVAL in + * this case. + * -EINVAL may be what we want for in-kernel breakpoints users, + * but -EIO looks better for ptrace, since we refuse a register + * writing for the user. And anyway this is the previous + * behaviour. + */ + if (IS_ERR(bp)) + return PTR_ERR(bp); + + t->ptrace_bps[nr] = bp; } else { + int err; + bp = t->ptrace_bps[nr]; - t->ptrace_bps[nr] = NULL; attr = bp->attr; attr.bp_addr = addr; - bp = modify_user_hw_breakpoint(bp, &attr); + err = modify_user_hw_breakpoint(bp, &attr); + if (err) + return err; } - /* - * CHECKME: the previous code returned -EIO if the addr wasn't a - * valid task virtual addr. The new one will return -EINVAL in this - * case. - * -EINVAL may be what we want for in-kernel breakpoints users, but - * -EIO looks better for ptrace, since we refuse a register writing - * for the user. And anyway this is the previous behaviour. - */ - if (IS_ERR(bp)) - return PTR_ERR(bp); - t->ptrace_bps[nr] = bp; return 0; } -- cgit v1.2.3 From 814e2c84a722c45650a9b8f52285d7ba6874f63b Mon Sep 17 00:00:00 2001 From: Andy Isaacson Date: Tue, 8 Dec 2009 00:29:42 -0800 Subject: x86: Factor duplicated code out of __show_regs() into show_regs_common() Unify x86_32 and x86_64 implementations of __show_regs() header, standardizing on the x86_64 format string in the process. Also, 32-bit will now call print_modules. Signed-off-by: Andy Isaacson Cc: Arjan van de Ven Cc: Robert Hancock Cc: Richard Zidlicky Cc: Andrew Morton LKML-Reference: <20091208082942.GA27174@hexapodia.org> [ v2: resolved conflict ] Signed-off-by: Ingo Molnar --- arch/x86/include/asm/system.h | 1 + arch/x86/kernel/process.c | 18 ++++++++++++++++++ arch/x86/kernel/process_32.c | 14 +------------- arch/x86/kernel/process_64.c | 16 ++-------------- 4 files changed, 22 insertions(+), 27 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index 022a84386de8..ecb544e65382 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -23,6 +23,7 @@ struct task_struct *__switch_to(struct task_struct *prev, struct tss_struct; void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, struct tss_struct *tss); +extern void show_regs_common(void); #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 5e2ba634ea15..90cf1250a005 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -10,6 +10,8 @@ #include #include #include +#include +#include #include #include #include @@ -90,6 +92,22 @@ void exit_thread(void) } } +void show_regs_common(void) +{ + const char *board; + + board = dmi_get_system_info(DMI_PRODUCT_NAME); + if (!board) + board = ""; + + printk("\n"); + printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s %s\n", + current->pid, current->comm, print_tainted(), + init_utsname()->release, + (int)strcspn(init_utsname()->version, " "), + init_utsname()->version, board); +} + void flush_thread(void) { struct task_struct *tsk = current; diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 075580b35682..120b88797a75 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include @@ -35,7 +34,6 @@ #include #include #include -#include #include #include #include @@ -128,7 +126,6 @@ void __show_regs(struct pt_regs *regs, int all) unsigned long d0, d1, d2, d3, d6, d7; unsigned long sp; unsigned short ss, gs; - const char *board; if (user_mode_vm(regs)) { sp = regs->sp; @@ -140,16 +137,7 @@ void __show_regs(struct pt_regs *regs, int all) savesegment(gs, gs); } - printk("\n"); - - board = dmi_get_system_info(DMI_PRODUCT_NAME); - if (!board) - board = ""; - printk("Pid: %d, comm: %s %s (%s %.*s) %s\n", - task_pid_nr(current), current->comm, - print_tainted(), init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version, board); + show_regs_common(); printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n", (u16)regs->cs, regs->ip, regs->flags, diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index c95c8f4e790a..e5ab0cd0ef36 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include @@ -38,7 +37,6 @@ #include #include #include -#include #include #include @@ -163,18 +161,8 @@ void __show_regs(struct pt_regs *regs, int all) unsigned long d0, d1, d2, d3, d6, d7; unsigned int fsindex, gsindex; unsigned int ds, cs, es; - const char *board; - - printk("\n"); - print_modules(); - board = dmi_get_system_info(DMI_PRODUCT_NAME); - if (!board) - board = ""; - printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s %s\n", - current->pid, current->comm, print_tainted(), - init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version, board); + + show_regs_common(); printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip); printk_address(regs->ip, 1); printk(KERN_INFO "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, -- cgit v1.2.3 From a1884b8e558ef6395f6033f9e1b69b332dd040e0 Mon Sep 17 00:00:00 2001 From: Andy Isaacson Date: Tue, 8 Dec 2009 00:30:21 -0800 Subject: x86: Print DMI_BOARD_NAME as well as DMI_PRODUCT_NAME from __show_regs() Robert Hancock observes that DMI_BOARD_NAME is often more useful than DMI_PRODUCT_NAME, especially on standalone motherboards. So, print both. Signed-off-by: Andy Isaacson Cc: Arjan van de Ven Cc: Robert Hancock Cc: Richard Zidlicky Cc: Andrew Morton LKML-Reference: <20091208083021.GB27174@hexapodia.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/process.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 90cf1250a005..7a7bd4e3ec49 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -94,18 +94,21 @@ void exit_thread(void) void show_regs_common(void) { - const char *board; + const char *board, *product; - board = dmi_get_system_info(DMI_PRODUCT_NAME); + board = dmi_get_system_info(DMI_BOARD_NAME); if (!board) board = ""; + product = dmi_get_system_info(DMI_PRODUCT_NAME); + if (!product) + product = ""; printk("\n"); - printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s %s\n", + printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s %s/%s\n", current->pid, current->comm, print_tainted(), init_utsname()->release, (int)strcspn(init_utsname()->version, " "), - init_utsname()->version, board); + init_utsname()->version, board, product); } void flush_thread(void) -- cgit v1.2.3 From 5cd476effe9570d2e520cf904d51f5c992972647 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 9 Dec 2009 10:45:33 -0800 Subject: x86: es7000_32.c: Use pr_ and add pr_fmt(fmt) - Added #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - Converted a few printk(KERN_INFO to pr_info( - Stripped "es7000_mipcfg" from pr_debug Signed-off-by: Joe Perches LKML-Reference: <3b4375af246dec5941168858910210937c110af9.1260383912.git.joe@perches.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/es7000_32.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index e85f8fb7f8e7..dd2b5f264643 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c @@ -27,6 +27,9 @@ * * http://www.unisys.com */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -223,9 +226,9 @@ static int parse_unisys_oem(char *oemptr) mip_addr = val; mip = (struct mip_reg *)val; mip_reg = __va(mip); - pr_debug("es7000_mipcfg: host_reg = 0x%lx \n", + pr_debug("host_reg = 0x%lx\n", (unsigned long)host_reg); - pr_debug("es7000_mipcfg: mip_reg = 0x%lx \n", + pr_debug("mip_reg = 0x%lx\n", (unsigned long)mip_reg); success++; break; @@ -401,7 +404,7 @@ static void es7000_enable_apic_mode(void) if (!es7000_plat) return; - printk(KERN_INFO "ES7000: Enabling APIC mode.\n"); + pr_info("Enabling APIC mode.\n"); memset(&es7000_mip_reg, 0, sizeof(struct mip_reg)); es7000_mip_reg.off_0x00 = MIP_SW_APIC; es7000_mip_reg.off_0x38 = MIP_VALID; @@ -514,8 +517,7 @@ static void es7000_setup_apic_routing(void) { int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id()); - printk(KERN_INFO - "Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", + pr_info("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", (apic_version[apic] == 0x14) ? "Physical Cluster" : "Logical Cluster", nr_ioapics, cpumask_bits(es7000_target_cpus())[0]); -- cgit v1.2.3 From 40685236b3161b80030a4df34bcbc5941ea59876 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 9 Dec 2009 10:45:34 -0800 Subject: x86: setup_percpu.c: Use pr_ and add pr_fmt(fmt) - Added #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - Stripped PERCPU: from a pr_warning Signed-off-by: Joe Perches LKML-Reference: <7ead24eccbea8f2b11795abad3e2893a98e1e111.1260383912.git.joe@perches.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup_percpu.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index d559af913e1f..35abcb8b00e9 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -1,3 +1,5 @@ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -20,9 +22,9 @@ #include #ifdef CONFIG_DEBUG_PER_CPU_MAPS -# define DBG(x...) printk(KERN_DEBUG x) +# define DBG(fmt, ...) pr_dbg(fmt, ##__VA_ARGS__) #else -# define DBG(x...) +# define DBG(fmt, ...) do { if (0) pr_dbg(fmt, ##__VA_ARGS__); } while (0) #endif DEFINE_PER_CPU(int, cpu_number); @@ -116,8 +118,8 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size, } else { ptr = __alloc_bootmem_node_nopanic(NODE_DATA(node), size, align, goal); - pr_debug("per cpu data for cpu%d %lu bytes on node%d at " - "%016lx\n", cpu, size, node, __pa(ptr)); + pr_debug("per cpu data for cpu%d %lu bytes on node%d at %016lx\n", + cpu, size, node, __pa(ptr)); } return ptr; #else @@ -198,8 +200,7 @@ void __init setup_per_cpu_areas(void) pcpu_cpu_distance, pcpu_fc_alloc, pcpu_fc_free); if (rc < 0) - pr_warning("PERCPU: %s allocator failed (%d), " - "falling back to page size\n", + pr_warning("%s allocator failed (%d), falling back to page size\n", pcpu_fc_names[pcpu_chosen_fc], rc); } if (rc < 0) -- cgit v1.2.3 From a78d9626f4f6fa7904bfdb071205080743125983 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 9 Dec 2009 10:45:35 -0800 Subject: x86: i8254.c: Add pr_fmt(fmt) - Add pr_fmt(fmt) "pit: " fmt - Strip pit: prefixes from pr_debug Signed-off-by: Joe Perches LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kvm/i8254.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index fab7440c9bb2..296aba49472a 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -29,6 +29,8 @@ * Based on QEMU and Xen. */ +#define pr_fmt(fmt) "pit: " fmt + #include #include "irq.h" @@ -262,7 +264,7 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) static void destroy_pit_timer(struct kvm_timer *pt) { - pr_debug("pit: execute del timer!\n"); + pr_debug("execute del timer!\n"); hrtimer_cancel(&pt->timer); } @@ -284,7 +286,7 @@ static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period) interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ); - pr_debug("pit: create pit timer, interval is %llu nsec\n", interval); + pr_debug("create pit timer, interval is %llu nsec\n", interval); /* TODO The new value only affected after the retriggered */ hrtimer_cancel(&pt->timer); @@ -309,7 +311,7 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val) WARN_ON(!mutex_is_locked(&ps->lock)); - pr_debug("pit: load_count val is %d, channel is %d\n", val, channel); + pr_debug("load_count val is %d, channel is %d\n", val, channel); /* * The largest possible initial count is 0; this is equivalent @@ -395,8 +397,8 @@ static int pit_ioport_write(struct kvm_io_device *this, mutex_lock(&pit_state->lock); if (val != 0) - pr_debug("pit: write addr is 0x%x, len is %d, val is 0x%x\n", - (unsigned int)addr, len, val); + pr_debug("write addr is 0x%x, len is %d, val is 0x%x\n", + (unsigned int)addr, len, val); if (addr == 3) { channel = val >> 6; -- cgit v1.2.3 From 1bd591a5f17f546121fcf0015d72cc3e9c49cc29 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 9 Dec 2009 10:45:36 -0800 Subject: x86: kmmio.c: Add and use pr_fmt(fmt) - Add #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - Strip "kmmio: " from pr_s Signed-off-by: Joe Perches LKML-Reference: <7aa509f8a23933036d39f54bd51e9acc52068049.1260383912.git.joe@perches.com> Signed-off-by: Ingo Molnar --- arch/x86/mm/kmmio.c | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 88612cdcdc3b..68c3e89af5c2 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -5,6 +5,8 @@ * 2008 Pekka Paalanen */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -136,7 +138,7 @@ static int clear_page_presence(struct kmmio_fault_page *f, bool clear) pte_t *pte = lookup_address(f->page, &level); if (!pte) { - pr_err("kmmio: no pte for page 0x%08lx\n", f->page); + pr_err("no pte for page 0x%08lx\n", f->page); return -1; } @@ -148,7 +150,7 @@ static int clear_page_presence(struct kmmio_fault_page *f, bool clear) clear_pte_presence(pte, clear, &f->old_presence); break; default: - pr_err("kmmio: unexpected page level 0x%x.\n", level); + pr_err("unexpected page level 0x%x.\n", level); return -1; } @@ -170,13 +172,14 @@ static int clear_page_presence(struct kmmio_fault_page *f, bool clear) static int arm_kmmio_fault_page(struct kmmio_fault_page *f) { int ret; - WARN_ONCE(f->armed, KERN_ERR "kmmio page already armed.\n"); + WARN_ONCE(f->armed, KERN_ERR pr_fmt("kmmio page already armed.\n")); if (f->armed) { - pr_warning("kmmio double-arm: page 0x%08lx, ref %d, old %d\n", - f->page, f->count, !!f->old_presence); + pr_warning("double-arm: page 0x%08lx, ref %d, old %d\n", + f->page, f->count, !!f->old_presence); } ret = clear_page_presence(f, true); - WARN_ONCE(ret < 0, KERN_ERR "kmmio arming 0x%08lx failed.\n", f->page); + WARN_ONCE(ret < 0, KERN_ERR pr_fmt("arming 0x%08lx failed.\n"), + f->page); f->armed = true; return ret; } @@ -240,24 +243,21 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) * condition needs handling by do_page_fault(), the * page really not being present is the most common. */ - pr_debug("kmmio: secondary hit for 0x%08lx CPU %d.\n", - addr, smp_processor_id()); + pr_debug("secondary hit for 0x%08lx CPU %d.\n", + addr, smp_processor_id()); if (!faultpage->old_presence) - pr_info("kmmio: unexpected secondary hit for " - "address 0x%08lx on CPU %d.\n", addr, - smp_processor_id()); + pr_info("unexpected secondary hit for address 0x%08lx on CPU %d.\n", + addr, smp_processor_id()); } else { /* * Prevent overwriting already in-flight context. * This should not happen, let's hope disarming at * least prevents a panic. */ - pr_emerg("kmmio: recursive probe hit on CPU %d, " - "for address 0x%08lx. Ignoring.\n", - smp_processor_id(), addr); - pr_emerg("kmmio: previous hit was at 0x%08lx.\n", - ctx->addr); + pr_emerg("recursive probe hit on CPU %d, for address 0x%08lx. Ignoring.\n", + smp_processor_id(), addr); + pr_emerg("previous hit was at 0x%08lx.\n", ctx->addr); disarm_kmmio_fault_page(faultpage); } goto no_kmmio_ctx; @@ -316,8 +316,8 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) * something external causing them (f.e. using a debugger while * mmio tracing enabled), or erroneous behaviour */ - pr_warning("kmmio: unexpected debug trap on CPU %d.\n", - smp_processor_id()); + pr_warning("unexpected debug trap on CPU %d.\n", + smp_processor_id()); goto out; } @@ -425,7 +425,7 @@ int register_kmmio_probe(struct kmmio_probe *p) list_add_rcu(&p->list, &kmmio_probes); while (size < size_lim) { if (add_kmmio_fault_page(p->addr + size)) - pr_err("kmmio: Unable to set page fault.\n"); + pr_err("Unable to set page fault.\n"); size += PAGE_SIZE; } out: @@ -511,7 +511,7 @@ void unregister_kmmio_probe(struct kmmio_probe *p) drelease = kmalloc(sizeof(*drelease), GFP_ATOMIC); if (!drelease) { - pr_crit("kmmio: leaking kmmio_fault_page objects.\n"); + pr_crit("leaking kmmio_fault_page objects.\n"); return; } drelease->release_list = release_list; -- cgit v1.2.3 From 3a0340be06a9356eb61f6804107480acbe62c069 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 9 Dec 2009 10:45:37 -0800 Subject: x86: mmio-mod.c: Use pr_fmt - Add #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - Remove #define NAME - Remove NAME from pr_ Signed-off-by: Joe Perches LKML-Reference: <009cb214c45ef932df0242856228f4739cc91408.1260383912.git.joe@perches.com> Signed-off-by: Ingo Molnar --- arch/x86/mm/mmio-mod.c | 71 +++++++++++++++++++++++++------------------------- 1 file changed, 35 insertions(+), 36 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c index 132772a8ec57..4c765e9c4664 100644 --- a/arch/x86/mm/mmio-mod.c +++ b/arch/x86/mm/mmio-mod.c @@ -19,6 +19,9 @@ * * Derived from the read-mod example from relay-examples by Tom Zanussi. */ + +#define pr_fmt(fmt) "mmiotrace: " + #define DEBUG 1 #include @@ -36,8 +39,6 @@ #include "pf_in.h" -#define NAME "mmiotrace: " - struct trap_reason { unsigned long addr; unsigned long ip; @@ -96,17 +97,18 @@ static void print_pte(unsigned long address) pte_t *pte = lookup_address(address, &level); if (!pte) { - pr_err(NAME "Error in %s: no pte for page 0x%08lx\n", - __func__, address); + pr_err("Error in %s: no pte for page 0x%08lx\n", + __func__, address); return; } if (level == PG_LEVEL_2M) { - pr_emerg(NAME "4MB pages are not currently supported: " - "0x%08lx\n", address); + pr_emerg("4MB pages are not currently supported: 0x%08lx\n", + address); BUG(); } - pr_info(NAME "pte for 0x%lx: 0x%llx 0x%llx\n", address, + pr_info("pte for 0x%lx: 0x%llx 0x%llx\n", + address, (unsigned long long)pte_val(*pte), (unsigned long long)pte_val(*pte) & _PAGE_PRESENT); } @@ -118,22 +120,21 @@ static void print_pte(unsigned long address) static void die_kmmio_nesting_error(struct pt_regs *regs, unsigned long addr) { const struct trap_reason *my_reason = &get_cpu_var(pf_reason); - pr_emerg(NAME "unexpected fault for address: 0x%08lx, " - "last fault for address: 0x%08lx\n", - addr, my_reason->addr); + pr_emerg("unexpected fault for address: 0x%08lx, last fault for address: 0x%08lx\n", + addr, my_reason->addr); print_pte(addr); print_symbol(KERN_EMERG "faulting IP is at %s\n", regs->ip); print_symbol(KERN_EMERG "last faulting IP was at %s\n", my_reason->ip); #ifdef __i386__ pr_emerg("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", - regs->ax, regs->bx, regs->cx, regs->dx); + regs->ax, regs->bx, regs->cx, regs->dx); pr_emerg("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", - regs->si, regs->di, regs->bp, regs->sp); + regs->si, regs->di, regs->bp, regs->sp); #else pr_emerg("rax: %016lx rcx: %016lx rdx: %016lx\n", - regs->ax, regs->cx, regs->dx); + regs->ax, regs->cx, regs->dx); pr_emerg("rsi: %016lx rdi: %016lx rbp: %016lx rsp: %016lx\n", - regs->si, regs->di, regs->bp, regs->sp); + regs->si, regs->di, regs->bp, regs->sp); #endif put_cpu_var(pf_reason); BUG(); @@ -213,7 +214,7 @@ static void post(struct kmmio_probe *p, unsigned long condition, /* this should always return the active_trace count to 0 */ my_reason->active_traces--; if (my_reason->active_traces) { - pr_emerg(NAME "unexpected post handler"); + pr_emerg("unexpected post handler"); BUG(); } @@ -244,7 +245,7 @@ static void ioremap_trace_core(resource_size_t offset, unsigned long size, }; if (!trace) { - pr_err(NAME "kmalloc failed in ioremap\n"); + pr_err("kmalloc failed in ioremap\n"); return; } @@ -282,8 +283,8 @@ void mmiotrace_ioremap(resource_size_t offset, unsigned long size, if (!is_enabled()) /* recheck and proper locking in *_core() */ return; - pr_debug(NAME "ioremap_*(0x%llx, 0x%lx) = %p\n", - (unsigned long long)offset, size, addr); + pr_debug("ioremap_*(0x%llx, 0x%lx) = %p\n", + (unsigned long long)offset, size, addr); if ((filter_offset) && (offset != filter_offset)) return; ioremap_trace_core(offset, size, addr); @@ -301,7 +302,7 @@ static void iounmap_trace_core(volatile void __iomem *addr) struct remap_trace *tmp; struct remap_trace *found_trace = NULL; - pr_debug(NAME "Unmapping %p.\n", addr); + pr_debug("Unmapping %p.\n", addr); spin_lock_irq(&trace_lock); if (!is_enabled()) @@ -363,9 +364,8 @@ static void clear_trace_list(void) * Caller also ensures is_enabled() cannot change. */ list_for_each_entry(trace, &trace_list, list) { - pr_notice(NAME "purging non-iounmapped " - "trace @0x%08lx, size 0x%lx.\n", - trace->probe.addr, trace->probe.len); + pr_notice("purging non-iounmapped trace @0x%08lx, size 0x%lx.\n", + trace->probe.addr, trace->probe.len); if (!nommiotrace) unregister_kmmio_probe(&trace->probe); } @@ -387,7 +387,7 @@ static void enter_uniprocessor(void) if (downed_cpus == NULL && !alloc_cpumask_var(&downed_cpus, GFP_KERNEL)) { - pr_notice(NAME "Failed to allocate mask\n"); + pr_notice("Failed to allocate mask\n"); goto out; } @@ -395,20 +395,19 @@ static void enter_uniprocessor(void) cpumask_copy(downed_cpus, cpu_online_mask); cpumask_clear_cpu(cpumask_first(cpu_online_mask), downed_cpus); if (num_online_cpus() > 1) - pr_notice(NAME "Disabling non-boot CPUs...\n"); + pr_notice("Disabling non-boot CPUs...\n"); put_online_cpus(); for_each_cpu(cpu, downed_cpus) { err = cpu_down(cpu); if (!err) - pr_info(NAME "CPU%d is down.\n", cpu); + pr_info("CPU%d is down.\n", cpu); else - pr_err(NAME "Error taking CPU%d down: %d\n", cpu, err); + pr_err("Error taking CPU%d down: %d\n", cpu, err); } out: if (num_online_cpus() > 1) - pr_warning(NAME "multiple CPUs still online, " - "may miss events.\n"); + pr_warning("multiple CPUs still online, may miss events.\n"); } /* __ref because leave_uniprocessor calls cpu_up which is __cpuinit, @@ -420,13 +419,13 @@ static void __ref leave_uniprocessor(void) if (downed_cpus == NULL || cpumask_weight(downed_cpus) == 0) return; - pr_notice(NAME "Re-enabling CPUs...\n"); + pr_notice("Re-enabling CPUs...\n"); for_each_cpu(cpu, downed_cpus) { err = cpu_up(cpu); if (!err) - pr_info(NAME "enabled CPU%d.\n", cpu); + pr_info("enabled CPU%d.\n", cpu); else - pr_err(NAME "cannot re-enable CPU%d: %d\n", cpu, err); + pr_err("cannot re-enable CPU%d: %d\n", cpu, err); } } @@ -434,8 +433,8 @@ static void __ref leave_uniprocessor(void) static void enter_uniprocessor(void) { if (num_online_cpus() > 1) - pr_warning(NAME "multiple CPUs are online, may miss events. " - "Suggest booting with maxcpus=1 kernel argument.\n"); + pr_warning("multiple CPUs are online, may miss events. " + "Suggest booting with maxcpus=1 kernel argument.\n"); } static void leave_uniprocessor(void) @@ -450,13 +449,13 @@ void enable_mmiotrace(void) goto out; if (nommiotrace) - pr_info(NAME "MMIO tracing disabled.\n"); + pr_info("MMIO tracing disabled.\n"); kmmio_init(); enter_uniprocessor(); spin_lock_irq(&trace_lock); atomic_inc(&mmiotrace_enabled); spin_unlock_irq(&trace_lock); - pr_info(NAME "enabled.\n"); + pr_info("enabled.\n"); out: mutex_unlock(&mmiotrace_mutex); } @@ -475,7 +474,7 @@ void disable_mmiotrace(void) clear_trace_list(); /* guarantees: no more kmmio callbacks */ leave_uniprocessor(); kmmio_cleanup(); - pr_info(NAME "disabled.\n"); + pr_info("disabled.\n"); out: mutex_unlock(&mmiotrace_mutex); } -- cgit v1.2.3 From b7cc9554bc73641c9ed4d7eb74b2d6e78f20abea Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 10 Dec 2009 11:03:39 +0100 Subject: x86/amd-iommu: Fix passthrough mode The data structure changes to use dev->archdata.iommu field broke the iommu=pt mode because in this case the dev->archdata.iommu was left uninitialized. This moves the inititalization of the devices into the main init function and fixes the problem. Signed-off-by: Joerg Roedel --- arch/x86/include/asm/amd_iommu_proto.h | 3 ++- arch/x86/kernel/amd_iommu.c | 39 ++++++++++++++++++++++++++++++++-- arch/x86/kernel/amd_iommu_init.c | 7 ++++++ 3 files changed, 46 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/amd_iommu_proto.h b/arch/x86/include/asm/amd_iommu_proto.h index 84786fb9a23b..2566e2606224 100644 --- a/arch/x86/include/asm/amd_iommu_proto.h +++ b/arch/x86/include/asm/amd_iommu_proto.h @@ -28,7 +28,8 @@ extern void amd_iommu_flush_all_domains(void); extern void amd_iommu_flush_all_devices(void); extern void amd_iommu_apply_erratum_63(u16 devid); extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu); - +extern int amd_iommu_init_devices(void); +extern void amd_iommu_uninit_devices(void); #ifndef CONFIG_AMD_IOMMU_STATS static inline void amd_iommu_stats_init(void) { } diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 32fb09102a13..450dd6ac03d3 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -166,6 +166,43 @@ static void iommu_uninit_device(struct device *dev) { kfree(dev->archdata.iommu); } + +void __init amd_iommu_uninit_devices(void) +{ + struct pci_dev *pdev = NULL; + + for_each_pci_dev(pdev) { + + if (!check_device(&pdev->dev)) + continue; + + iommu_uninit_device(&pdev->dev); + } +} + +int __init amd_iommu_init_devices(void) +{ + struct pci_dev *pdev = NULL; + int ret = 0; + + for_each_pci_dev(pdev) { + + if (!check_device(&pdev->dev)) + continue; + + ret = iommu_init_device(&pdev->dev); + if (ret) + goto out_free; + } + + return 0; + +out_free: + + amd_iommu_uninit_devices(); + + return ret; +} #ifdef CONFIG_AMD_IOMMU_STATS /* @@ -2145,8 +2182,6 @@ static void prealloc_protection_domains(void) if (!check_device(&dev->dev)) continue; - iommu_init_device(&dev->dev); - /* Is there already any domain for it? */ if (domain_for_device(&dev->dev)) continue; diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 7ffc39965233..df01c691d130 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -1274,6 +1274,10 @@ static int __init amd_iommu_init(void) if (ret) goto free; + ret = amd_iommu_init_devices(); + if (ret) + goto free; + if (iommu_pass_through) ret = amd_iommu_init_passthrough(); else @@ -1296,6 +1300,9 @@ out: return ret; free: + + amd_iommu_uninit_devices(); + free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, get_order(MAX_DOMAIN_ID/8)); -- cgit v1.2.3 From 8638c4914f34fedc1c13b1cc13f6d1e5a78c46b4 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 10 Dec 2009 11:12:25 +0100 Subject: x86/amd-iommu: Fix PCI hotplug with passthrough mode The device change notifier is initialized in the dma_ops initialization path. But this path is never executed for iommu=pt. Move the notifier initialization to IOMMU hardware init code to fix this. Signed-off-by: Joerg Roedel --- arch/x86/include/asm/amd_iommu_proto.h | 1 + arch/x86/kernel/amd_iommu.c | 7 +++++-- arch/x86/kernel/amd_iommu_init.c | 2 ++ 3 files changed, 8 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/amd_iommu_proto.h b/arch/x86/include/asm/amd_iommu_proto.h index 2566e2606224..4d817f9e6e77 100644 --- a/arch/x86/include/asm/amd_iommu_proto.h +++ b/arch/x86/include/asm/amd_iommu_proto.h @@ -30,6 +30,7 @@ extern void amd_iommu_apply_erratum_63(u16 devid); extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu); extern int amd_iommu_init_devices(void); extern void amd_iommu_uninit_devices(void); +extern void amd_iommu_init_notifier(void); #ifndef CONFIG_AMD_IOMMU_STATS static inline void amd_iommu_stats_init(void) { } diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 450dd6ac03d3..a83185080e91 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1624,6 +1624,11 @@ static struct notifier_block device_nb = { .notifier_call = device_change_notifier, }; +void amd_iommu_init_notifier(void) +{ + bus_register_notifier(&pci_bus_type, &device_nb); +} + /***************************************************************************** * * The next functions belong to the dma_ops mapping/unmapping code. @@ -2250,8 +2255,6 @@ int __init amd_iommu_init_dma_ops(void) register_iommu(&amd_iommu_ops); - bus_register_notifier(&pci_bus_type, &device_nb); - amd_iommu_stats_init(); return 0; diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index df01c691d130..309a52f96e0b 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -1285,6 +1285,8 @@ static int __init amd_iommu_init(void) if (ret) goto free; + amd_iommu_init_notifier(); + enable_iommus(); if (iommu_pass_through) -- cgit v1.2.3 From 6b2f3d1f769be5779b479c37800229d9a4809fc3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 27 Oct 2009 11:05:28 +0100 Subject: vfs: Implement proper O_SYNC semantics While Linux provided an O_SYNC flag basically since day 1, it took until Linux 2.4.0-test12pre2 to actually get it implemented for filesystems, since that day we had generic_osync_around with only minor changes and the great "For now, when the user asks for O_SYNC, we'll actually give O_DSYNC" comment. This patch intends to actually give us real O_SYNC semantics in addition to the O_DSYNC semantics. After Jan's O_SYNC patches which are required before this patch it's actually surprisingly simple, we just need to figure out when to set the datasync flag to vfs_fsync_range and when not. This patch renames the existing O_SYNC flag to O_DSYNC while keeping it's numerical value to keep binary compatibility, and adds a new real O_SYNC flag. To guarantee backwards compatiblity it is defined as expanding to both the O_DSYNC and the new additional binary flag (__O_SYNC) to make sure we are backwards-compatible when compiled against the new headers. This also means that all places that don't care about the differences can just check O_DSYNC and get the right behaviour for O_SYNC, too - only places that actuall care need to check __O_SYNC in addition. Drivers and network filesystems have been updated in a fail safe way to always do the full sync magic if O_DSYNC is set. The few places setting O_SYNC for lower layers are kept that way for now to stay failsafe. We enforce that O_DSYNC is set when __O_SYNC is set early in the open path to make sure we always get these sane options. Note that parisc really screwed up their headers as they already define a O_DSYNC that has always been a no-op. We try to repair it by using it for the new O_DSYNC and redefinining O_SYNC to send both the traditional O_SYNC numerical value _and_ the O_DSYNC one. Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Grant Grundler Cc: "David S. Miller" Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Thomas Gleixner Cc: Al Viro Cc: Andreas Dilger Acked-by: Trond Myklebust Acked-by: Kyle McMartin Acked-by: Ulrich Drepper Signed-off-by: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Jan Kara --- arch/x86/mm/pat.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 66b55d6e69ed..ae9648eb1c7f 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -704,9 +704,8 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, if (!range_is_allowed(pfn, size)) return 0; - if (file->f_flags & O_SYNC) { + if (file->f_flags & O_DSYNC) flags = _PAGE_CACHE_UC_MINUS; - } #ifdef CONFIG_X86_32 /* -- cgit v1.2.3 From 5e855db5d8fec44e6604eb245aa9077bbd3f0d05 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Thu, 10 Dec 2009 17:08:54 +0800 Subject: perf_event: Fix variable initialization in other codepaths Signed-off-by: Xiao Guangrong Cc: Peter Zijlstra Cc: Frederic Weisbecker Cc: Paul Mackerras LKML-Reference: <4B20BAA6.7010609@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index d35f26076ae5..1342f236e32a 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1632,6 +1632,7 @@ static void intel_pmu_drain_bts_buffer(struct cpu_hw_events *cpuc) data.period = event->hw.last_period; data.addr = 0; + data.raw = NULL; regs.ip = 0; /* @@ -1749,6 +1750,7 @@ static int p6_pmu_handle_irq(struct pt_regs *regs) u64 val; data.addr = 0; + data.raw = NULL; cpuc = &__get_cpu_var(cpu_hw_events); @@ -1794,6 +1796,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) u64 ack, status; data.addr = 0; + data.raw = NULL; cpuc = &__get_cpu_var(cpu_hw_events); @@ -1857,6 +1860,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs) u64 val; data.addr = 0; + data.raw = NULL; cpuc = &__get_cpu_var(cpu_hw_events); -- cgit v1.2.3 From 125580380f418000b1a06d9a54700f1191b6e561 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Thu, 10 Dec 2009 19:56:34 +0300 Subject: x86, perf events: Check if we have APIC enabled Ralf Hildebrandt reported this boot warning: | Running a vanilla 2.6.32 as Xen DomU, I'm getting: | | [ 0.000999] CPU: Physical Processor ID: 0 | [ 0.000999] CPU: Processor Core ID: 1 | [ 0.000999] Performance Events: AMD PMU driver. | [ 0.000999] ------------[ cut here ]------------ | [ 0.000999] WARNING: at arch/x86/kernel/apic/apic.c:249 native_apic_write_dummy So we need to check if APIC functionality is available, and not just in the P6 driver but elsewhere as well. Reported-by: Ralf Hildebrandt Signed-off-by: Cyrill Gorcunov Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: <20091210165634.GF5086@lenovo> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 1342f236e32a..18f05eccbb62 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -2066,12 +2066,6 @@ static __init int p6_pmu_init(void) x86_pmu = p6_pmu; - if (!cpu_has_apic) { - pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n"); - pr_info("no hardware sampling interrupt available.\n"); - x86_pmu.apic = 0; - } - return 0; } @@ -2163,6 +2157,16 @@ static __init int amd_pmu_init(void) return 0; } +static void __init pmu_check_apic(void) +{ + if (cpu_has_apic) + return; + + x86_pmu.apic = 0; + pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n"); + pr_info("no hardware sampling interrupt available.\n"); +} + void __init init_hw_perf_events(void) { int err; @@ -2184,6 +2188,8 @@ void __init init_hw_perf_events(void) return; } + pmu_check_apic(); + pr_cont("%s PMU driver.\n", x86_pmu.name); if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) { -- cgit v1.2.3 From f8b7256096a20436f6d0926747e3ac3d64c81d24 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 30 Nov 2009 17:37:04 -0500 Subject: Unify sys_mmap* New helper - sys_mmap_pgoff(); switch syscalls to using it. Acked-by: David S. Miller Signed-off-by: Al Viro --- arch/x86/ia32/ia32entry.S | 2 +- arch/x86/ia32/sys_ia32.c | 43 +------------------------------------- arch/x86/include/asm/sys_ia32.h | 3 --- arch/x86/include/asm/syscalls.h | 2 -- arch/x86/kernel/sys_i386_32.c | 27 +----------------------- arch/x86/kernel/sys_x86_64.c | 17 +-------------- arch/x86/kernel/syscall_table_32.S | 2 +- 7 files changed, 5 insertions(+), 91 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 4eefdca9832b..53147ad85b96 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -696,7 +696,7 @@ ia32_sys_call_table: .quad quiet_ni_syscall /* streams2 */ .quad stub32_vfork /* 190 */ .quad compat_sys_getrlimit - .quad sys32_mmap2 + .quad sys_mmap_pgoff .quad sys32_truncate64 .quad sys32_ftruncate64 .quad sys32_stat64 /* 195 */ diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index df82c0e48ded..422572c77923 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -155,9 +155,6 @@ struct mmap_arg_struct { asmlinkage long sys32_mmap(struct mmap_arg_struct __user *arg) { struct mmap_arg_struct a; - struct file *file = NULL; - unsigned long retval; - struct mm_struct *mm ; if (copy_from_user(&a, arg, sizeof(a))) return -EFAULT; @@ -165,22 +162,8 @@ asmlinkage long sys32_mmap(struct mmap_arg_struct __user *arg) if (a.offset & ~PAGE_MASK) return -EINVAL; - if (!(a.flags & MAP_ANONYMOUS)) { - file = fget(a.fd); - if (!file) - return -EBADF; - } - - mm = current->mm; - down_write(&mm->mmap_sem); - retval = do_mmap_pgoff(file, a.addr, a.len, a.prot, a.flags, + return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset>>PAGE_SHIFT); - if (file) - fput(file); - - up_write(&mm->mmap_sem); - - return retval; } asmlinkage long sys32_mprotect(unsigned long start, size_t len, @@ -483,30 +466,6 @@ asmlinkage long sys32_sendfile(int out_fd, int in_fd, return ret; } -asmlinkage long sys32_mmap2(unsigned long addr, unsigned long len, - unsigned long prot, unsigned long flags, - unsigned long fd, unsigned long pgoff) -{ - struct mm_struct *mm = current->mm; - unsigned long error; - struct file *file = NULL; - - flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - if (!(flags & MAP_ANONYMOUS)) { - file = fget(fd); - if (!file) - return -EBADF; - } - - down_write(&mm->mmap_sem); - error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); - up_write(&mm->mmap_sem); - - if (file) - fput(file); - return error; -} - asmlinkage long sys32_olduname(struct oldold_utsname __user *name) { char *arch = "x86_64"; diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h index 9af9decb38c3..4a5a089e1c62 100644 --- a/arch/x86/include/asm/sys_ia32.h +++ b/arch/x86/include/asm/sys_ia32.h @@ -57,9 +57,6 @@ asmlinkage long sys32_pwrite(unsigned int, char __user *, u32, u32, u32); asmlinkage long sys32_personality(unsigned long); asmlinkage long sys32_sendfile(int, int, compat_off_t __user *, s32); -asmlinkage long sys32_mmap2(unsigned long, unsigned long, unsigned long, - unsigned long, unsigned long, unsigned long); - struct oldold_utsname; struct old_utsname; asmlinkage long sys32_olduname(struct oldold_utsname __user *); diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index 372b76edd63f..1bb6e395881c 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -55,8 +55,6 @@ struct sel_arg_struct; struct oldold_utsname; struct old_utsname; -asmlinkage long sys_mmap2(unsigned long, unsigned long, unsigned long, - unsigned long, unsigned long, unsigned long); asmlinkage int old_mmap(struct mmap_arg_struct __user *); asmlinkage int old_select(struct sel_arg_struct __user *); asmlinkage int sys_ipc(uint, int, int, int, void __user *, long); diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c index 1884a8d12bfa..dee1ff7cba58 100644 --- a/arch/x86/kernel/sys_i386_32.c +++ b/arch/x86/kernel/sys_i386_32.c @@ -24,31 +24,6 @@ #include -asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, - unsigned long prot, unsigned long flags, - unsigned long fd, unsigned long pgoff) -{ - int error = -EBADF; - struct file *file = NULL; - struct mm_struct *mm = current->mm; - - flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - if (!(flags & MAP_ANONYMOUS)) { - file = fget(fd); - if (!file) - goto out; - } - - down_write(&mm->mmap_sem); - error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); - up_write(&mm->mmap_sem); - - if (file) - fput(file); -out: - return error; -} - /* * Perform the select(nd, in, out, ex, tv) and mmap() system * calls. Linux/i386 didn't use to be able to handle more than @@ -77,7 +52,7 @@ asmlinkage int old_mmap(struct mmap_arg_struct __user *arg) if (a.offset & ~PAGE_MASK) goto out; - err = sys_mmap2(a.addr, a.len, a.prot, a.flags, + err = sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset >> PAGE_SHIFT); out: return err; diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index 45e00eb09c3a..8aa2057efd12 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -23,26 +23,11 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, unsigned long, fd, unsigned long, off) { long error; - struct file *file; - error = -EINVAL; if (off & ~PAGE_MASK) goto out; - error = -EBADF; - file = NULL; - flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - if (!(flags & MAP_ANONYMOUS)) { - file = fget(fd); - if (!file) - goto out; - } - down_write(¤t->mm->mmap_sem); - error = do_mmap_pgoff(file, addr, len, prot, flags, off >> PAGE_SHIFT); - up_write(¤t->mm->mmap_sem); - - if (file) - fput(file); + error = sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT); out: return error; } diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index 70c2125d55b9..15228b5d3eb7 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S @@ -191,7 +191,7 @@ ENTRY(sys_call_table) .long sys_ni_syscall /* reserved for streams2 */ .long ptregs_vfork /* 190 */ .long sys_getrlimit - .long sys_mmap2 + .long sys_mmap_pgoff .long sys_truncate64 .long sys_ftruncate64 .long sys_stat64 /* 195 */ -- cgit v1.2.3 From a5d09d68335bb8422d5e7050c9f03f99ba6cfebd Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Fri, 11 Dec 2009 08:43:12 -0600 Subject: kgdb,x86: remove redundant test The for loop starts with a breakno of 0, and ends when it's 4. so this test is always true. Signed-off-by: Roel Kluin Signed-off-by: Andrew Morton Signed-off-by: Jason Wessel --- arch/x86/kernel/kgdb.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 20a5b3689463..f93d015753ce 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -220,8 +220,7 @@ static void kgdb_correct_hw_break(void) dr7 |= ((breakinfo[breakno].len << 2) | breakinfo[breakno].type) << ((breakno << 2) + 16); - if (breakno >= 0 && breakno <= 3) - set_debugreg(breakinfo[breakno].addr, breakno); + set_debugreg(breakinfo[breakno].addr, breakno); } else { if ((dr7 & breakbit) && !breakinfo[breakno].enabled) { -- cgit v1.2.3 From cf6f196d112a6f6757b1ca3cce0b576f7abee479 Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Fri, 11 Dec 2009 08:43:16 -0600 Subject: kgdb,i386: Fix corner case access to ss with NMI watch dog exception It is possible for the user_mode_vm(regs) check to return true on the i368 arch for a non master kgdb cpu or when the master kgdb cpu handles the NMI watch dog exception. The solution is simply to select the correct gdb_ss location based on the check to user_mode_vm(regs). CC: Ingo Molnar Acked-by: H. Peter Anvin Signed-off-by: Jason Wessel --- arch/x86/kernel/kgdb.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index f93d015753ce..aefae46aa646 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -86,9 +86,15 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) gdb_regs[GDB_DS] = regs->ds; gdb_regs[GDB_ES] = regs->es; gdb_regs[GDB_CS] = regs->cs; - gdb_regs[GDB_SS] = __KERNEL_DS; gdb_regs[GDB_FS] = 0xFFFF; gdb_regs[GDB_GS] = 0xFFFF; + if (user_mode_vm(regs)) { + gdb_regs[GDB_SS] = regs->ss; + gdb_regs[GDB_SP] = regs->sp; + } else { + gdb_regs[GDB_SS] = __KERNEL_DS; + gdb_regs[GDB_SP] = kernel_stack_pointer(regs); + } #else gdb_regs[GDB_R8] = regs->r8; gdb_regs[GDB_R9] = regs->r9; @@ -101,8 +107,8 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) gdb_regs32[GDB_PS] = regs->flags; gdb_regs32[GDB_CS] = regs->cs; gdb_regs32[GDB_SS] = regs->ss; -#endif gdb_regs[GDB_SP] = kernel_stack_pointer(regs); +#endif } /** -- cgit v1.2.3 From 8097551d9ab9b9e3630694ad1bc6e12c597c515e Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Fri, 11 Dec 2009 08:43:18 -0600 Subject: kgdb,x86: do not set kgdb_single_step on x86 On an SMP system the kgdb_single_step flag has the possibility to indefinitely hang the system in the case. Consider the case where, CPU 1 has the schedule lock and CPU 0 is set to single step, there is no way for CPU 0 to run another task. The easy way to observe the problem is to make 2 cpus busy, and run the kgdb test suite. You will see that it hangs the system very quickly. while [ 1 ] ; do find /proc > /dev/null 2>&1 ; done & while [ 1 ] ; do find /proc > /dev/null 2>&1 ; done & echo V1 > /sys/module/kgdbts/parameters/kgdbts The side effect of this patch is that there is the possibility to miss a breakpoint in the case that a single step operation was executed to step over a breakpoint in common code. The trade off of the missed breakpoint is preferred to hanging the kernel. This can be fixed in the future by using kprobes or another strategy to step over planted breakpoints with out of line execution. CC: Ingo Molnar Signed-off-by: Jason Wessel --- arch/x86/kernel/kgdb.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index aefae46aa646..dd74fe7273b1 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -400,7 +400,6 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, /* set the trace bit if we're stepping */ if (remcomInBuffer[0] == 's') { linux_regs->flags |= X86_EFLAGS_TF; - kgdb_single_step = 1; atomic_set(&kgdb_cpu_doing_single_step, raw_smp_processor_id()); } -- cgit v1.2.3