From 937582382c71b75b29fbb92615629494e1a05ac0 Mon Sep 17 00:00:00 2001 From: Weidong Han Date: Fri, 17 Apr 2009 16:42:14 +0800 Subject: x86, intr-remap: enable interrupt remapping early Currently, when x2apic is not enabled, interrupt remapping will be enabled in init_dmars(), where it is too late to remap ioapic interrupts, that is, ioapic interrupts are really in compatibility mode, not remappable mode. This patch always enables interrupt remapping before ioapic setup, it guarantees all interrupts will be remapped when interrupt remapping is enabled. Thus it doesn't need to set the compatibility interrupt bit. [ Impact: refactor intr-remap init sequence, enable fuller remap mode ] Signed-off-by: Suresh Siddha Signed-off-by: Weidong Han Acked-by: David Woodhouse Cc: iommu@lists.linux-foundation.org Cc: allen.m.kay@intel.com Cc: fenghua.yu@intel.com LKML-Reference: <1239957736-6161-4-git-send-email-weidong.han@intel.com> Signed-off-by: Ingo Molnar --- drivers/pci/intr_remapping.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'drivers/pci/intr_remapping.c') diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index f5e0ea724a6f..5c2142656e96 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -423,20 +423,6 @@ static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode) readl, (sts & DMA_GSTS_IRTPS), sts); spin_unlock_irqrestore(&iommu->register_lock, flags); - if (mode == 0) { - spin_lock_irqsave(&iommu->register_lock, flags); - - /* enable comaptiblity format interrupt pass through */ - cmd = iommu->gcmd | DMA_GCMD_CFI; - iommu->gcmd |= DMA_GCMD_CFI; - writel(cmd, iommu->reg + DMAR_GCMD_REG); - - IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, - readl, (sts & DMA_GSTS_CFIS), sts); - - spin_unlock_irqrestore(&iommu->register_lock, flags); - } - /* * global invalidation of interrupt entry cache before enabling * interrupt-remapping. @@ -516,6 +502,20 @@ end: spin_unlock_irqrestore(&iommu->register_lock, flags); } +int __init intr_remapping_supported(void) +{ + struct dmar_drhd_unit *drhd; + + for_each_drhd_unit(drhd) { + struct intel_iommu *iommu = drhd->iommu; + + if (!ecap_ir_support(iommu->ecap)) + return 0; + } + + return 1; +} + int __init enable_intr_remapping(int eim) { struct dmar_drhd_unit *drhd; -- cgit v1.2.3 From 03ea81550676296d94596e4337c771c6ba29f542 Mon Sep 17 00:00:00 2001 From: Weidong Han Date: Fri, 17 Apr 2009 16:42:15 +0800 Subject: x86, intr-remap: add option to disable interrupt remapping Add option "nointremap" to disable interrupt remapping. [ Impact: add new boot option ] Signed-off-by: Suresh Siddha Signed-off-by: Weidong Han Acked-by: David Woodhouse Cc: iommu@lists.linux-foundation.org Cc: allen.m.kay@intel.com Cc: fenghua.yu@intel.com LKML-Reference: <1239957736-6161-5-git-send-email-weidong.han@intel.com> Signed-off-by: Ingo Molnar --- drivers/pci/intr_remapping.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'drivers/pci/intr_remapping.c') diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index 5c2142656e96..842039e4955b 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -15,6 +15,14 @@ static struct ioapic_scope ir_ioapic[MAX_IO_APICS]; static int ir_ioapic_num; int intr_remapping_enabled; +static int disable_intremap; +static __init int setup_nointremap(char *str) +{ + disable_intremap = 1; + return 0; +} +early_param("nointremap", setup_nointremap); + struct irq_2_iommu { struct intel_iommu *iommu; u16 irte_index; @@ -506,6 +514,9 @@ int __init intr_remapping_supported(void) { struct dmar_drhd_unit *drhd; + if (disable_intremap) + return 0; + for_each_drhd_unit(drhd) { struct intel_iommu *iommu = drhd->iommu; -- cgit v1.2.3 From 85ac16d033370caf6f48d743c8dc8103700f5cc5 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 27 Apr 2009 18:00:38 -0700 Subject: x86/irq: change irq_desc_alloc() to take node instead of cpu This simplifies the node awareness of the code. All our allocators only deal with a NUMA node ID locality not with CPU ids anyway - so there's no need to maintain (and transform) a CPU id all across the IRq layer. v2: keep move_irq_desc related [ Impact: cleanup, prepare IRQ code to be NUMA-aware ] Signed-off-by: Yinghai Lu Cc: Andrew Morton Cc: Suresh Siddha Cc: "Eric W. Biederman" Cc: Rusty Russell Cc: Jeremy Fitzhardinge LKML-Reference: <49F65536.2020300@kernel.org> Signed-off-by: Ingo Molnar --- drivers/pci/intr_remapping.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'drivers/pci/intr_remapping.c') diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index f5e0ea724a6f..9eff36a293e0 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -23,15 +23,12 @@ struct irq_2_iommu { }; #ifdef CONFIG_GENERIC_HARDIRQS -static struct irq_2_iommu *get_one_free_irq_2_iommu(int cpu) +static struct irq_2_iommu *get_one_free_irq_2_iommu(int node) { struct irq_2_iommu *iommu; - int node; - - node = cpu_to_node(cpu); iommu = kzalloc_node(sizeof(*iommu), GFP_ATOMIC, node); - printk(KERN_DEBUG "alloc irq_2_iommu on cpu %d node %d\n", cpu, node); + printk(KERN_DEBUG "alloc irq_2_iommu on node %d\n", node); return iommu; } @@ -48,7 +45,7 @@ static struct irq_2_iommu *irq_2_iommu(unsigned int irq) return desc->irq_2_iommu; } -static struct irq_2_iommu *irq_2_iommu_alloc_cpu(unsigned int irq, int cpu) +static struct irq_2_iommu *irq_2_iommu_alloc_node(unsigned int irq, int node) { struct irq_desc *desc; struct irq_2_iommu *irq_iommu; @@ -56,7 +53,7 @@ static struct irq_2_iommu *irq_2_iommu_alloc_cpu(unsigned int irq, int cpu) /* * alloc irq desc if not allocated already. */ - desc = irq_to_desc_alloc_cpu(irq, cpu); + desc = irq_to_desc_alloc_node(irq, node); if (!desc) { printk(KERN_INFO "can not get irq_desc for %d\n", irq); return NULL; @@ -65,14 +62,14 @@ static struct irq_2_iommu *irq_2_iommu_alloc_cpu(unsigned int irq, int cpu) irq_iommu = desc->irq_2_iommu; if (!irq_iommu) - desc->irq_2_iommu = get_one_free_irq_2_iommu(cpu); + desc->irq_2_iommu = get_one_free_irq_2_iommu(node); return desc->irq_2_iommu; } static struct irq_2_iommu *irq_2_iommu_alloc(unsigned int irq) { - return irq_2_iommu_alloc_cpu(irq, boot_cpu_id); + return irq_2_iommu_alloc_node(irq, cpu_to_node(boot_cpu_id)); } #else /* !CONFIG_SPARSE_IRQ */ -- cgit v1.2.3 From c416daa98a584596df21ee2c26fac6579ee58f57 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 10 May 2009 20:30:58 +0100 Subject: intel-iommu: Tidy up iommu->gcmd handling Signed-off-by: David Woodhouse --- drivers/pci/intr_remapping.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'drivers/pci/intr_remapping.c') diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index f5e0ea724a6f..166959614087 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -404,7 +404,7 @@ int free_irte(int irq) static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode) { u64 addr; - u32 cmd, sts; + u32 sts; unsigned long flags; addr = virt_to_phys((void *)iommu->ir_table->base); @@ -415,9 +415,8 @@ static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode) (addr) | IR_X2APIC_MODE(mode) | INTR_REMAP_TABLE_REG_SIZE); /* Set interrupt-remapping table pointer */ - cmd = iommu->gcmd | DMA_GCMD_SIRTP; iommu->gcmd |= DMA_GCMD_SIRTP; - writel(cmd, iommu->reg + DMAR_GCMD_REG); + writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_IRTPS), sts); @@ -427,9 +426,8 @@ static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode) spin_lock_irqsave(&iommu->register_lock, flags); /* enable comaptiblity format interrupt pass through */ - cmd = iommu->gcmd | DMA_GCMD_CFI; iommu->gcmd |= DMA_GCMD_CFI; - writel(cmd, iommu->reg + DMAR_GCMD_REG); + writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_CFIS), sts); @@ -446,9 +444,8 @@ static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode) spin_lock_irqsave(&iommu->register_lock, flags); /* Enable interrupt-remapping */ - cmd = iommu->gcmd | DMA_GCMD_IRE; iommu->gcmd |= DMA_GCMD_IRE; - writel(cmd, iommu->reg + DMAR_GCMD_REG); + writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_IRES), sts); -- cgit v1.2.3 From c4658b4e777bebf69884f4884a9bfb2f84dd71d9 Mon Sep 17 00:00:00 2001 From: Weidong Han Date: Sat, 23 May 2009 00:41:14 +0800 Subject: Intel-IOMMU, intr-remap: set the whole 128bits of irte when modify/free it Interrupt remapping table entry is 128bits. Currently, it only sets low 64bits of irte in modify_irte and free_irte. This ignores high 64bits setting of irte, that means source-id setting will be ignored. This patch sets the whole 128bits of irte when modify/free it. Following source-id checking patch depends on this. Signed-off-by: Weidong Han Signed-off-by: David Woodhouse --- drivers/pci/intr_remapping.c | 40 ++++++++++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 14 deletions(-) (limited to 'drivers/pci/intr_remapping.c') diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index 1e83c8c5f985..44025a0c2bb6 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -314,7 +314,8 @@ int modify_irte(int irq, struct irte *irte_modified) index = irq_iommu->irte_index + irq_iommu->sub_handle; irte = &iommu->ir_table->base[index]; - set_64bit((unsigned long *)irte, irte_modified->low); + set_64bit((unsigned long *)&irte->low, irte_modified->low); + set_64bit((unsigned long *)&irte->high, irte_modified->high); __iommu_flush_cache(iommu, irte, sizeof(*irte)); rc = qi_flush_iec(iommu, index, 0); @@ -369,12 +370,32 @@ struct intel_iommu *map_dev_to_ir(struct pci_dev *dev) return drhd->iommu; } +static int clear_entries(struct irq_2_iommu *irq_iommu) +{ + struct irte *start, *entry, *end; + struct intel_iommu *iommu; + int index; + + if (irq_iommu->sub_handle) + return 0; + + iommu = irq_iommu->iommu; + index = irq_iommu->irte_index + irq_iommu->sub_handle; + + start = iommu->ir_table->base + index; + end = start + (1 << irq_iommu->irte_mask); + + for (entry = start; entry < end; entry++) { + set_64bit((unsigned long *)&entry->low, 0); + set_64bit((unsigned long *)&entry->high, 0); + } + + return qi_flush_iec(iommu, index, irq_iommu->irte_mask); +} + int free_irte(int irq) { int rc = 0; - int index, i; - struct irte *irte; - struct intel_iommu *iommu; struct irq_2_iommu *irq_iommu; unsigned long flags; @@ -385,16 +406,7 @@ int free_irte(int irq) return -1; } - iommu = irq_iommu->iommu; - - index = irq_iommu->irte_index + irq_iommu->sub_handle; - irte = &iommu->ir_table->base[index]; - - if (!irq_iommu->sub_handle) { - for (i = 0; i < (1 << irq_iommu->irte_mask); i++) - set_64bit((unsigned long *)(irte + i), 0); - rc = qi_flush_iec(iommu, index, irq_iommu->irte_mask); - } + rc = clear_entries(irq_iommu); irq_iommu->iommu = NULL; irq_iommu->irte_index = 0; -- cgit v1.2.3 From f007e99c8e2e322b8331aba72414715119a2920d Mon Sep 17 00:00:00 2001 From: Weidong Han Date: Sat, 23 May 2009 00:41:15 +0800 Subject: Intel-IOMMU, intr-remap: source-id checking To support domain-isolation usages, the platform hardware must be capable of uniquely identifying the requestor (source-id) for each interrupt message. Without source-id checking for interrupt remapping , a rouge guest/VM with assigned devices can launch interrupt attacks to bring down anothe guest/VM or the VMM itself. This patch adds source-id checking for interrupt remapping, and then really isolates interrupts for guests/VMs with assigned devices. Because PCI subsystem is not initialized yet when set up IOAPIC entries, use read_pci_config_byte to access PCI config space directly. Signed-off-by: Weidong Han Signed-off-by: David Woodhouse --- drivers/pci/intr_remapping.c | 120 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 117 insertions(+), 3 deletions(-) (limited to 'drivers/pci/intr_remapping.c') diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index 44025a0c2bb6..4f5b8712931f 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -10,6 +10,8 @@ #include #include "intr_remapping.h" #include +#include +#include "pci.h" static struct ioapic_scope ir_ioapic[MAX_IO_APICS]; static int ir_ioapic_num; @@ -418,6 +420,91 @@ int free_irte(int irq) return rc; } +/* + * source validation type + */ +#define SVT_NO_VERIFY 0x0 /* no verification is required */ +#define SVT_VERIFY_SID_SQ 0x1 /* verify using SID and SQ fiels */ +#define SVT_VERIFY_BUS 0x2 /* verify bus of request-id */ + +/* + * source-id qualifier + */ +#define SQ_ALL_16 0x0 /* verify all 16 bits of request-id */ +#define SQ_13_IGNORE_1 0x1 /* verify most significant 13 bits, ignore + * the third least significant bit + */ +#define SQ_13_IGNORE_2 0x2 /* verify most significant 13 bits, ignore + * the second and third least significant bits + */ +#define SQ_13_IGNORE_3 0x3 /* verify most significant 13 bits, ignore + * the least three significant bits + */ + +/* + * set SVT, SQ and SID fields of irte to verify + * source ids of interrupt requests + */ +static void set_irte_sid(struct irte *irte, unsigned int svt, + unsigned int sq, unsigned int sid) +{ + irte->svt = svt; + irte->sq = sq; + irte->sid = sid; +} + +int set_ioapic_sid(struct irte *irte, int apic) +{ + int i; + u16 sid = 0; + + if (!irte) + return -1; + + for (i = 0; i < MAX_IO_APICS; i++) { + if (ir_ioapic[i].id == apic) { + sid = (ir_ioapic[i].bus << 8) | ir_ioapic[i].devfn; + break; + } + } + + if (sid == 0) { + pr_warning("Failed to set source-id of IOAPIC (%d)\n", apic); + return -1; + } + + set_irte_sid(irte, 1, 0, sid); + + return 0; +} + +int set_msi_sid(struct irte *irte, struct pci_dev *dev) +{ + struct pci_dev *bridge; + + if (!irte || !dev) + return -1; + + /* PCIe device or Root Complex integrated PCI device */ + if (dev->is_pcie || !dev->bus->parent) { + set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16, + (dev->bus->number << 8) | dev->devfn); + return 0; + } + + bridge = pci_find_upstream_pcie_bridge(dev); + if (bridge) { + if (bridge->is_pcie) /* this is a PCIE-to-PCI/PCIX bridge */ + set_irte_sid(irte, SVT_VERIFY_BUS, SQ_ALL_16, + (bridge->bus->number << 8) | dev->bus->number); + else /* this is a legacy PCI bridge */ + set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16, + (bridge->bus->number << 8) | bridge->devfn); + } + + return 0; +} + static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode) { u64 addr; @@ -624,6 +711,35 @@ error: return -1; } +static void ir_parse_one_ioapic_scope(struct acpi_dmar_device_scope *scope, + struct intel_iommu *iommu) +{ + struct acpi_dmar_pci_path *path; + u8 bus; + int count; + + bus = scope->bus; + path = (struct acpi_dmar_pci_path *)(scope + 1); + count = (scope->length - sizeof(struct acpi_dmar_device_scope)) + / sizeof(struct acpi_dmar_pci_path); + + while (--count > 0) { + /* + * Access PCI directly due to the PCI + * subsystem isn't initialized yet. + */ + bus = read_pci_config_byte(bus, path->dev, path->fn, + PCI_SECONDARY_BUS); + path++; + } + + ir_ioapic[ir_ioapic_num].bus = bus; + ir_ioapic[ir_ioapic_num].devfn = PCI_DEVFN(path->dev, path->fn); + ir_ioapic[ir_ioapic_num].iommu = iommu; + ir_ioapic[ir_ioapic_num].id = scope->enumeration_id; + ir_ioapic_num++; +} + static int ir_parse_ioapic_scope(struct acpi_dmar_header *header, struct intel_iommu *iommu) { @@ -648,9 +764,7 @@ static int ir_parse_ioapic_scope(struct acpi_dmar_header *header, " 0x%Lx\n", scope->enumeration_id, drhd->address); - ir_ioapic[ir_ioapic_num].iommu = iommu; - ir_ioapic[ir_ioapic_num].id = scope->enumeration_id; - ir_ioapic_num++; + ir_parse_one_ioapic_scope(scope, iommu); } start += scope->length; } -- cgit v1.2.3