From c14d26905d256565052dce99c35b02470162e679 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 30 May 2013 12:39:18 -0600 Subject: iommu/{vt-d,amd}: Remove multifunction assumption around grouping If a device is multifunction and does not have ACS enabled then we assume that the entire package lacks ACS and use function 0 as the base of the group. The PCIe spec however states that components are permitted to implement ACS on some, none, or all of their applicable functions. It's therefore conceivable that function 0 may be fully independent and support ACS while other functions do not. Instead use the lowest function of the slot that does not have ACS enabled as the base of the group. This may be the current device, which is intentional. So long as we use a consistent algorithm, all the non-ACS functions will be grouped together and ACS functions will get separate groups. Signed-off-by: Alex Williamson Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) (limited to 'drivers/iommu/amd_iommu.c') diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 21d02b0d907c..5a02d0776a66 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -287,14 +287,27 @@ static struct pci_dev *get_isolation_root(struct pci_dev *pdev) /* * If it's a multifunction device that does not support our - * required ACS flags, add to the same group as function 0. + * required ACS flags, add to the same group as lowest numbered + * function that also does not suport the required ACS flags. */ if (dma_pdev->multifunction && - !pci_acs_enabled(dma_pdev, REQ_ACS_FLAGS)) - swap_pci_ref(&dma_pdev, - pci_get_slot(dma_pdev->bus, - PCI_DEVFN(PCI_SLOT(dma_pdev->devfn), - 0))); + !pci_acs_enabled(dma_pdev, REQ_ACS_FLAGS)) { + u8 i, slot = PCI_SLOT(dma_pdev->devfn); + + for (i = 0; i < 8; i++) { + struct pci_dev *tmp; + + tmp = pci_get_slot(dma_pdev->bus, PCI_DEVFN(slot, i)); + if (!tmp) + continue; + + if (!pci_acs_enabled(tmp, REQ_ACS_FLAGS)) { + swap_pci_ref(&dma_pdev, tmp); + break; + } + pci_dev_put(tmp); + } + } /* * Devices on the root bus go through the iommu. If that's not us, -- cgit v1.2.3 From 5c34c403b72395e59cad64a75fb0b772b0ab9cd4 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 20 Jun 2013 20:22:58 +0200 Subject: iommu/amd: Fix memory leak in free_pagetable The IOMMU pagetables can have up to 6 levels, but the code in free_pagetable() only releases the first 3 levels. Fix this leak by releasing all levels. Reported-by: Alex Williamson Signed-off-by: Joerg Roedel Reviewed-by: Alex Williamson --- drivers/iommu/amd_iommu.c | 73 +++++++++++++++++++++++++++++++---------------- 1 file changed, 49 insertions(+), 24 deletions(-) (limited to 'drivers/iommu/amd_iommu.c') diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 21d02b0d907c..5cde682377a0 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -1893,34 +1893,59 @@ static void domain_id_free(int id) write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); } +#define DEFINE_FREE_PT_FN(LVL, FN) \ +static void free_pt_##LVL (unsigned long __pt) \ +{ \ + unsigned long p; \ + u64 *pt; \ + int i; \ + \ + pt = (u64 *)__pt; \ + \ + for (i = 0; i < 512; ++i) { \ + if (!IOMMU_PTE_PRESENT(pt[i])) \ + continue; \ + \ + p = (unsigned long)IOMMU_PTE_PAGE(pt[i]); \ + FN(p); \ + } \ + free_page((unsigned long)pt); \ +} + +DEFINE_FREE_PT_FN(l2, free_page) +DEFINE_FREE_PT_FN(l3, free_pt_l2) +DEFINE_FREE_PT_FN(l4, free_pt_l3) +DEFINE_FREE_PT_FN(l5, free_pt_l4) +DEFINE_FREE_PT_FN(l6, free_pt_l5) + static void free_pagetable(struct protection_domain *domain) { - int i, j; - u64 *p1, *p2, *p3; - - p1 = domain->pt_root; - - if (!p1) - return; - - for (i = 0; i < 512; ++i) { - if (!IOMMU_PTE_PRESENT(p1[i])) - continue; + unsigned long root = (unsigned long)domain->pt_root; - p2 = IOMMU_PTE_PAGE(p1[i]); - for (j = 0; j < 512; ++j) { - if (!IOMMU_PTE_PRESENT(p2[j])) - continue; - p3 = IOMMU_PTE_PAGE(p2[j]); - free_page((unsigned long)p3); - } - - free_page((unsigned long)p2); + switch (domain->mode) { + case PAGE_MODE_NONE: + break; + case PAGE_MODE_1_LEVEL: + free_page(root); + break; + case PAGE_MODE_2_LEVEL: + free_pt_l2(root); + break; + case PAGE_MODE_3_LEVEL: + free_pt_l3(root); + break; + case PAGE_MODE_4_LEVEL: + free_pt_l4(root); + break; + case PAGE_MODE_5_LEVEL: + free_pt_l5(root); + break; + case PAGE_MODE_6_LEVEL: + free_pt_l6(root); + break; + default: + BUG(); } - - free_page((unsigned long)p1); - - domain->pt_root = NULL; } static void free_gcr3_tbl_level1(u64 *tbl) -- cgit v1.2.3 From 60d0ca3cfd199b6612bbbbf4999a3470dad38bb1 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 21 Jun 2013 14:33:19 -0600 Subject: iommu/amd: Only unmap large pages from the first pte If we use a large mapping, the expectation is that only unmaps from the first pte in the superpage are supported. Unmaps from offsets into the superpage should fail (ie. return zero sized unmap). In the current code, unmapping from an offset clears the size of the full mapping starting from an offset. For instance, if we map a 16k physically contiguous range at IOVA 0x0 with a large page, then attempt to unmap 4k at offset 12k, 4 ptes are cleared (12k - 28k) and the unmap returns 16k unmapped. This potentially incorrectly clears valid mappings and confuses drivers like VFIO that use the unmap size to release pinned pages. Fix by refusing to unmap from offsets into the page. Signed-off-by: Alex Williamson Cc: stable@vger.kernel.org Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/iommu/amd_iommu.c') diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 5cde682377a0..4b029c17f8d0 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -1484,6 +1484,10 @@ static unsigned long iommu_unmap_page(struct protection_domain *dom, /* Large PTE found which maps this address */ unmap_size = PTE_PAGE_SIZE(*pte); + + /* Only unmap from the first pte in the page */ + if ((unmap_size - 1) & bus_addr) + break; count = PAGE_SIZE_PTE_COUNT(unmap_size); for (i = 0; i < count; i++) pte[i] = 0ULL; @@ -1493,7 +1497,7 @@ static unsigned long iommu_unmap_page(struct protection_domain *dom, unmapped += unmap_size; } - BUG_ON(!is_power_of_2(unmapped)); + BUG_ON(unmapped && !is_power_of_2(unmapped)); return unmapped; } -- cgit v1.2.3