From 158e8bfe802f730f9ea7cde32eee8b43285bdd4a Mon Sep 17 00:00:00 2001 From: Alessandro Rubini Date: Sun, 24 Jun 2012 12:46:26 +0100 Subject: ARM: 7432/1: use the new linux/sizes.h Signed-off-by: Alessandro Rubini Acked-by: Giancarlo Asnaghi Acked-by: Linus Walleij Cc: Alan Cox Signed-off-by: Russell King --- arch/arm/mm/dma-mapping.c | 2 +- arch/arm/mm/init.c | 2 +- arch/arm/mm/ioremap.c | 2 +- arch/arm/mm/mmu.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index ea6b43154090..30a031c0fcf5 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -23,12 +23,12 @@ #include #include #include +#include #include #include #include #include -#include #include #include #include diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index c21d06c7dd7e..ad7fd8ae8258 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -21,13 +21,13 @@ #include #include #include +#include #include #include #include #include #include -#include #include #include diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c index 4f55f5062ab7..566750fa57d4 100644 --- a/arch/arm/mm/ioremap.c +++ b/arch/arm/mm/ioremap.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -32,7 +33,6 @@ #include #include #include -#include #include #include diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index e5dad60b558b..2196116c882f 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -16,13 +16,13 @@ #include #include #include +#include #include #include #include #include #include -#include #include #include #include -- cgit v1.2.3 From 9ad86ddde0345e903eb74837e52cf19fe0c7f825 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 25 Jun 2012 14:59:38 +0100 Subject: ARM: 7436/1: Do not map the vectors page as write-through on UP systems The vectors page has been traditionally mapped as WT on UP systems but this creates a mismatched alias with the directly mapped RAM that is using WB attributes. On newer processors like Cortex-A15 this has implications on the data/instructions coherency at the point of unification (usually L2). This patch removes such restriction. Signed-off-by: Catalin Marinas Tested-by: Will Deacon Signed-off-by: Russell King --- arch/arm/mm/mmu.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index e5dad60b558b..f37dc1856a69 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -421,12 +421,6 @@ static void __init build_mem_type_table(void) cp = &cache_policies[cachepolicy]; vecs_pgprot = kern_pgprot = user_pgprot = cp->pte; - /* - * Only use write-through for non-SMP systems - */ - if (!is_smp() && cpu_arch >= CPU_ARCH_ARMv5 && cachepolicy > CPOLICY_WRITETHROUGH) - vecs_pgprot = cache_policies[CPOLICY_WRITETHROUGH].pte; - /* * Enable CPU-specific coherency if supported. * (Only available on XSC3 at the moment.) -- cgit v1.2.3 From 575320d625d5b5eb115575a1f5e17af456e69577 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 6 Jul 2012 15:43:03 +0100 Subject: ARM: 7445/1: mm: update CONTEXTIDR register to contain PID of current process This patch introduces a new Kconfig option which, when enabled, causes the kernel to write the PID of the current task into the PROCID field of the CONTEXTIDR on context switch. This is useful when analysing hardware trace, since writes to this register can be configured to emit an event into the trace stream. The thread notifier for writing the PID is deliberately kept separate from the ASID-writing code so that we can support newer processors using LPAE, where the ASID is stored in TTBR0. As such, the switch_mm code is updated to perform a read-modify-write sequence to ensure that we don't clobber the PID on CPUs using the classic 2-level page tables. Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/mm/context.c | 35 +++++++++++++++++++++++++++++++++++ arch/arm/mm/proc-v6.S | 6 ++++++ arch/arm/mm/proc-v7-2level.S | 5 +++++ 3 files changed, 46 insertions(+) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c index 806cc4f63516..119bc52ab93e 100644 --- a/arch/arm/mm/context.c +++ b/arch/arm/mm/context.c @@ -14,6 +14,7 @@ #include #include +#include #include static DEFINE_RAW_SPINLOCK(cpu_asid_lock); @@ -48,6 +49,40 @@ void cpu_set_reserved_ttbr0(void) } #endif +#ifdef CONFIG_PID_IN_CONTEXTIDR +static int contextidr_notifier(struct notifier_block *unused, unsigned long cmd, + void *t) +{ + u32 contextidr; + pid_t pid; + struct thread_info *thread = t; + + if (cmd != THREAD_NOTIFY_SWITCH) + return NOTIFY_DONE; + + pid = task_pid_nr(thread->task) << ASID_BITS; + asm volatile( + " mrc p15, 0, %0, c13, c0, 1\n" + " bfi %1, %0, #0, %2\n" + " mcr p15, 0, %1, c13, c0, 1\n" + : "=r" (contextidr), "+r" (pid) + : "I" (ASID_BITS)); + isb(); + + return NOTIFY_OK; +} + +static struct notifier_block contextidr_notifier_block = { + .notifier_call = contextidr_notifier, +}; + +static int __init contextidr_notifier_init(void) +{ + return thread_register_notifier(&contextidr_notifier_block); +} +arch_initcall(contextidr_notifier_init); +#endif + /* * We fork()ed a process, and we need a new context for the child * to run in. diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S index 5900cd520e84..86b8b480634f 100644 --- a/arch/arm/mm/proc-v6.S +++ b/arch/arm/mm/proc-v6.S @@ -107,6 +107,12 @@ ENTRY(cpu_v6_switch_mm) mcr p15, 0, r2, c7, c5, 6 @ flush BTAC/BTB mcr p15, 0, r2, c7, c10, 4 @ drain write buffer mcr p15, 0, r0, c2, c0, 0 @ set TTB 0 +#ifdef CONFIG_PID_IN_CONTEXTIDR + mrc p15, 0, r2, c13, c0, 1 @ read current context ID + bic r2, r2, #0xff @ extract the PID + and r1, r1, #0xff + orr r1, r1, r2 @ insert into new context ID +#endif mcr p15, 0, r1, c13, c0, 1 @ set context ID #endif mov pc, lr diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S index 42ac069c8012..fd045e706390 100644 --- a/arch/arm/mm/proc-v7-2level.S +++ b/arch/arm/mm/proc-v7-2level.S @@ -46,6 +46,11 @@ ENTRY(cpu_v7_switch_mm) #ifdef CONFIG_ARM_ERRATA_430973 mcr p15, 0, r2, c7, c5, 6 @ flush BTAC/BTB #endif +#ifdef CONFIG_PID_IN_CONTEXTIDR + mrc p15, 0, r2, c13, c0, 1 @ read current context ID + lsr r2, r2, #8 @ extract the PID + bfi r1, r2, #8, #24 @ insert into new context ID +#endif #ifdef CONFIG_ARM_ERRATA_754322 dsb #endif -- cgit v1.2.3 From 46c87852e99cf8ce97e207b11cde19085837e39c Mon Sep 17 00:00:00 2001 From: Prathyush K Date: Mon, 16 Jul 2012 08:59:55 +0200 Subject: ARM: dma-mapping: modify condition check while freeing pages WARNING: at mm/vmalloc.c:1471 __iommu_free_buffer+0xcc/0xd0() Trying to vfree() nonexistent vm area (ef095000) Modules linked in: [] (unwind_backtrace+0x0/0xfc) from [] (warn_slowpath_common+0x54/0x64) [] (warn_slowpath_common+0x54/0x64) from [] (warn_slowpath_fmt+0x30/0x40) [] (warn_slowpath_fmt+0x30/0x40) from [] (__iommu_free_buffer+0xcc/0xd0) [] (__iommu_free_buffer+0xcc/0xd0) from [] (exynos_drm_free_buf+0xe4/0x138) [] (exynos_drm_free_buf+0xe4/0x138) from [] (exynos_drm_gem_destroy+0x80/0xfc) [] (exynos_drm_gem_destroy+0x80/0xfc) from [] (drm_gem_object_free+0x28/0x34) [] (drm_gem_object_free+0x28/0x34) from [] (drm_gem_object_release_handle+0xcc/0xd8) [] (drm_gem_object_release_handle+0xcc/0xd8) from [] (idr_for_each+0x74/0xb8) [] (idr_for_each+0x74/0xb8) from [] (drm_gem_release+0x1c/0x30) [] (drm_gem_release+0x1c/0x30) from [] (drm_release+0x608/0x694) [] (drm_release+0x608/0x694) from [] (fput+0xb8/0x228) [] (fput+0xb8/0x228) from [] (filp_close+0x64/0x84) [] (filp_close+0x64/0x84) from [] (put_files_struct+0xe8/0x104) [] (put_files_struct+0xe8/0x104) from [] (do_exit+0x608/0x774) [] (do_exit+0x608/0x774) from [] (do_group_exit+0x48/0xb4) [] (do_group_exit+0x48/0xb4) from [] (sys_exit_group+0x10/0x18) [] (sys_exit_group+0x10/0x18) from [] (ret_fast_syscall+0x0/0x30) This patch modifies the condition while freeing to match the condition used while allocation. This fixes the above warning which arises when array size is equal to PAGE_SIZE where allocation is done using kzalloc but free is done using vfree. Signed-off-by: Prathyush K Signed-off-by: Marek Szyprowski --- arch/arm/mm/dma-mapping.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 4044abcf6f9d..655878bcc96d 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -1091,7 +1091,7 @@ error: while (--i) if (pages[i]) __free_pages(pages[i], 0); - if (array_size < PAGE_SIZE) + if (array_size <= PAGE_SIZE) kfree(pages); else vfree(pages); @@ -1106,7 +1106,7 @@ static int __iommu_free_buffer(struct device *dev, struct page **pages, size_t s for (i = 0; i < count; i++) if (pages[i]) __free_pages(pages[i], 0); - if (array_size < PAGE_SIZE) + if (array_size <= PAGE_SIZE) kfree(pages); else vfree(pages); -- cgit v1.2.3 From e9da6e9905e639b0f842a244bc770b48ad0523e9 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Mon, 30 Jul 2012 09:11:33 +0200 Subject: ARM: dma-mapping: remove custom consistent dma region This patch changes dma-mapping subsystem to use generic vmalloc areas for all consistent dma allocations. This increases the total size limit of the consistent allocations and removes platform hacks and a lot of duplicated code. Atomic allocations are served from special pool preallocated on boot, because vmalloc areas cannot be reliably created in atomic context. Signed-off-by: Marek Szyprowski Reviewed-by: Kyungmin Park Reviewed-by: Minchan Kim --- arch/arm/mm/dma-mapping.c | 486 ++++++++++++++++------------------------------ arch/arm/mm/mm.h | 3 + 2 files changed, 169 insertions(+), 320 deletions(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 655878bcc96d..f906d5f4cbd8 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -217,115 +218,70 @@ static void __dma_free_buffer(struct page *page, size_t size) } #ifdef CONFIG_MMU +#ifdef CONFIG_HUGETLB_PAGE +#error ARM Coherent DMA allocator does not (yet) support huge TLB +#endif -#define CONSISTENT_OFFSET(x) (((unsigned long)(x) - consistent_base) >> PAGE_SHIFT) -#define CONSISTENT_PTE_INDEX(x) (((unsigned long)(x) - consistent_base) >> PMD_SHIFT) - -/* - * These are the page tables (2MB each) covering uncached, DMA consistent allocations - */ -static pte_t **consistent_pte; - -#define DEFAULT_CONSISTENT_DMA_SIZE SZ_2M +static void *__alloc_from_contiguous(struct device *dev, size_t size, + pgprot_t prot, struct page **ret_page); -static unsigned long consistent_base = CONSISTENT_END - DEFAULT_CONSISTENT_DMA_SIZE; +static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp, + pgprot_t prot, struct page **ret_page, + const void *caller); -void __init init_consistent_dma_size(unsigned long size) +static void * +__dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot, + const void *caller) { - unsigned long base = CONSISTENT_END - ALIGN(size, SZ_2M); + struct vm_struct *area; + unsigned long addr; - BUG_ON(consistent_pte); /* Check we're called before DMA region init */ - BUG_ON(base < VMALLOC_END); + /* + * DMA allocation can be mapped to user space, so lets + * set VM_USERMAP flags too. + */ + area = get_vm_area_caller(size, VM_ARM_DMA_CONSISTENT | VM_USERMAP, + caller); + if (!area) + return NULL; + addr = (unsigned long)area->addr; + area->phys_addr = __pfn_to_phys(page_to_pfn(page)); - /* Grow region to accommodate specified size */ - if (base < consistent_base) - consistent_base = base; + if (ioremap_page_range(addr, addr + size, area->phys_addr, prot)) { + vunmap((void *)addr); + return NULL; + } + return (void *)addr; } -#include "vmregion.h" - -static struct arm_vmregion_head consistent_head = { - .vm_lock = __SPIN_LOCK_UNLOCKED(&consistent_head.vm_lock), - .vm_list = LIST_HEAD_INIT(consistent_head.vm_list), - .vm_end = CONSISTENT_END, -}; - -#ifdef CONFIG_HUGETLB_PAGE -#error ARM Coherent DMA allocator does not (yet) support huge TLB -#endif - -/* - * Initialise the consistent memory allocation. - */ -static int __init consistent_init(void) +static void __dma_free_remap(void *cpu_addr, size_t size) { - int ret = 0; - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - int i = 0; - unsigned long base = consistent_base; - unsigned long num_ptes = (CONSISTENT_END - base) >> PMD_SHIFT; - - if (IS_ENABLED(CONFIG_CMA) && !IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)) - return 0; - - consistent_pte = kmalloc(num_ptes * sizeof(pte_t), GFP_KERNEL); - if (!consistent_pte) { - pr_err("%s: no memory\n", __func__); - return -ENOMEM; + unsigned int flags = VM_ARM_DMA_CONSISTENT | VM_USERMAP; + struct vm_struct *area = find_vm_area(cpu_addr); + if (!area || (area->flags & flags) != flags) { + WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr); + return; } - - pr_debug("DMA memory: 0x%08lx - 0x%08lx:\n", base, CONSISTENT_END); - consistent_head.vm_start = base; - - do { - pgd = pgd_offset(&init_mm, base); - - pud = pud_alloc(&init_mm, pgd, base); - if (!pud) { - pr_err("%s: no pud tables\n", __func__); - ret = -ENOMEM; - break; - } - - pmd = pmd_alloc(&init_mm, pud, base); - if (!pmd) { - pr_err("%s: no pmd tables\n", __func__); - ret = -ENOMEM; - break; - } - WARN_ON(!pmd_none(*pmd)); - - pte = pte_alloc_kernel(pmd, base); - if (!pte) { - pr_err("%s: no pte tables\n", __func__); - ret = -ENOMEM; - break; - } - - consistent_pte[i++] = pte; - base += PMD_SIZE; - } while (base < CONSISTENT_END); - - return ret; + unmap_kernel_range((unsigned long)cpu_addr, size); + vunmap(cpu_addr); } -core_initcall(consistent_init); - -static void *__alloc_from_contiguous(struct device *dev, size_t size, - pgprot_t prot, struct page **ret_page); -static struct arm_vmregion_head coherent_head = { - .vm_lock = __SPIN_LOCK_UNLOCKED(&coherent_head.vm_lock), - .vm_list = LIST_HEAD_INIT(coherent_head.vm_list), +struct dma_pool { + size_t size; + spinlock_t lock; + unsigned long *bitmap; + unsigned long nr_pages; + void *vaddr; + struct page *page; }; -static size_t coherent_pool_size = DEFAULT_CONSISTENT_DMA_SIZE / 8; +static struct dma_pool atomic_pool = { + .size = SZ_256K, +}; static int __init early_coherent_pool(char *p) { - coherent_pool_size = memparse(p, &p); + atomic_pool.size = memparse(p, &p); return 0; } early_param("coherent_pool", early_coherent_pool); @@ -333,32 +289,45 @@ early_param("coherent_pool", early_coherent_pool); /* * Initialise the coherent pool for atomic allocations. */ -static int __init coherent_init(void) +static int __init atomic_pool_init(void) { + struct dma_pool *pool = &atomic_pool; pgprot_t prot = pgprot_dmacoherent(pgprot_kernel); - size_t size = coherent_pool_size; + unsigned long nr_pages = pool->size >> PAGE_SHIFT; + unsigned long *bitmap; struct page *page; void *ptr; + int bitmap_size = BITS_TO_LONGS(nr_pages) * sizeof(long); - if (!IS_ENABLED(CONFIG_CMA)) - return 0; + bitmap = kzalloc(bitmap_size, GFP_KERNEL); + if (!bitmap) + goto no_bitmap; - ptr = __alloc_from_contiguous(NULL, size, prot, &page); + if (IS_ENABLED(CONFIG_CMA)) + ptr = __alloc_from_contiguous(NULL, pool->size, prot, &page); + else + ptr = __alloc_remap_buffer(NULL, pool->size, GFP_KERNEL, prot, + &page, NULL); if (ptr) { - coherent_head.vm_start = (unsigned long) ptr; - coherent_head.vm_end = (unsigned long) ptr + size; - printk(KERN_INFO "DMA: preallocated %u KiB pool for atomic coherent allocations\n", - (unsigned)size / 1024); + spin_lock_init(&pool->lock); + pool->vaddr = ptr; + pool->page = page; + pool->bitmap = bitmap; + pool->nr_pages = nr_pages; + pr_info("DMA: preallocated %u KiB pool for atomic coherent allocations\n", + (unsigned)pool->size / 1024); return 0; } - printk(KERN_ERR "DMA: failed to allocate %u KiB pool for atomic coherent allocation\n", - (unsigned)size / 1024); + kfree(bitmap); +no_bitmap: + pr_err("DMA: failed to allocate %u KiB pool for atomic coherent allocation\n", + (unsigned)pool->size / 1024); return -ENOMEM; } /* * CMA is activated by core_initcall, so we must be called after it. */ -postcore_initcall(coherent_init); +postcore_initcall(atomic_pool_init); struct dma_contig_early_reserve { phys_addr_t base; @@ -406,112 +375,6 @@ void __init dma_contiguous_remap(void) } } -static void * -__dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot, - const void *caller) -{ - struct arm_vmregion *c; - size_t align; - int bit; - - if (!consistent_pte) { - pr_err("%s: not initialised\n", __func__); - dump_stack(); - return NULL; - } - - /* - * Align the virtual region allocation - maximum alignment is - * a section size, minimum is a page size. This helps reduce - * fragmentation of the DMA space, and also prevents allocations - * smaller than a section from crossing a section boundary. - */ - bit = fls(size - 1); - if (bit > SECTION_SHIFT) - bit = SECTION_SHIFT; - align = 1 << bit; - - /* - * Allocate a virtual address in the consistent mapping region. - */ - c = arm_vmregion_alloc(&consistent_head, align, size, - gfp & ~(__GFP_DMA | __GFP_HIGHMEM), caller); - if (c) { - pte_t *pte; - int idx = CONSISTENT_PTE_INDEX(c->vm_start); - u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1); - - pte = consistent_pte[idx] + off; - c->priv = page; - - do { - BUG_ON(!pte_none(*pte)); - - set_pte_ext(pte, mk_pte(page, prot), 0); - page++; - pte++; - off++; - if (off >= PTRS_PER_PTE) { - off = 0; - pte = consistent_pte[++idx]; - } - } while (size -= PAGE_SIZE); - - dsb(); - - return (void *)c->vm_start; - } - return NULL; -} - -static void __dma_free_remap(void *cpu_addr, size_t size) -{ - struct arm_vmregion *c; - unsigned long addr; - pte_t *ptep; - int idx; - u32 off; - - c = arm_vmregion_find_remove(&consistent_head, (unsigned long)cpu_addr); - if (!c) { - pr_err("%s: trying to free invalid coherent area: %p\n", - __func__, cpu_addr); - dump_stack(); - return; - } - - if ((c->vm_end - c->vm_start) != size) { - pr_err("%s: freeing wrong coherent size (%ld != %d)\n", - __func__, c->vm_end - c->vm_start, size); - dump_stack(); - size = c->vm_end - c->vm_start; - } - - idx = CONSISTENT_PTE_INDEX(c->vm_start); - off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1); - ptep = consistent_pte[idx] + off; - addr = c->vm_start; - do { - pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep); - - ptep++; - addr += PAGE_SIZE; - off++; - if (off >= PTRS_PER_PTE) { - off = 0; - ptep = consistent_pte[++idx]; - } - - if (pte_none(pte) || !pte_present(pte)) - pr_crit("%s: bad page in kernel page table\n", - __func__); - } while (size -= PAGE_SIZE); - - flush_tlb_kernel_range(c->vm_start, c->vm_end); - - arm_vmregion_free(&consistent_head, c); -} - static int __dma_update_pte(pte_t *pte, pgtable_t token, unsigned long addr, void *data) { @@ -552,16 +415,17 @@ static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp, return ptr; } -static void *__alloc_from_pool(struct device *dev, size_t size, - struct page **ret_page, const void *caller) +static void *__alloc_from_pool(size_t size, struct page **ret_page) { - struct arm_vmregion *c; + struct dma_pool *pool = &atomic_pool; + unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; + unsigned int pageno; + unsigned long flags; + void *ptr = NULL; size_t align; - if (!coherent_head.vm_start) { - printk(KERN_ERR "%s: coherent pool not initialised!\n", - __func__); - dump_stack(); + if (!pool->vaddr) { + WARN(1, "coherent pool not initialised!\n"); return NULL; } @@ -571,35 +435,41 @@ static void *__alloc_from_pool(struct device *dev, size_t size, * size. This helps reduce fragmentation of the DMA space. */ align = PAGE_SIZE << get_order(size); - c = arm_vmregion_alloc(&coherent_head, align, size, 0, caller); - if (c) { - void *ptr = (void *)c->vm_start; - struct page *page = virt_to_page(ptr); - *ret_page = page; - return ptr; + + spin_lock_irqsave(&pool->lock, flags); + pageno = bitmap_find_next_zero_area(pool->bitmap, pool->nr_pages, + 0, count, (1 << align) - 1); + if (pageno < pool->nr_pages) { + bitmap_set(pool->bitmap, pageno, count); + ptr = pool->vaddr + PAGE_SIZE * pageno; + *ret_page = pool->page + pageno; } - return NULL; + spin_unlock_irqrestore(&pool->lock, flags); + + return ptr; } -static int __free_from_pool(void *cpu_addr, size_t size) +static int __free_from_pool(void *start, size_t size) { - unsigned long start = (unsigned long)cpu_addr; - unsigned long end = start + size; - struct arm_vmregion *c; + struct dma_pool *pool = &atomic_pool; + unsigned long pageno, count; + unsigned long flags; - if (start < coherent_head.vm_start || end > coherent_head.vm_end) + if (start < pool->vaddr || start > pool->vaddr + pool->size) return 0; - c = arm_vmregion_find_remove(&coherent_head, (unsigned long)start); - - if ((c->vm_end - c->vm_start) != size) { - printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n", - __func__, c->vm_end - c->vm_start, size); - dump_stack(); - size = c->vm_end - c->vm_start; + if (start + size > pool->vaddr + pool->size) { + WARN(1, "freeing wrong coherent size from pool\n"); + return 0; } - arm_vmregion_free(&coherent_head, c); + pageno = (start - pool->vaddr) >> PAGE_SHIFT; + count = size >> PAGE_SHIFT; + + spin_lock_irqsave(&pool->lock, flags); + bitmap_clear(pool->bitmap, pageno, count); + spin_unlock_irqrestore(&pool->lock, flags); + return 1; } @@ -644,7 +514,7 @@ static inline pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot) #define __get_dma_pgprot(attrs, prot) __pgprot(0) #define __alloc_remap_buffer(dev, size, gfp, prot, ret, c) NULL -#define __alloc_from_pool(dev, size, ret_page, c) NULL +#define __alloc_from_pool(size, ret_page) NULL #define __alloc_from_contiguous(dev, size, prot, ret) NULL #define __free_from_pool(cpu_addr, size) 0 #define __free_from_contiguous(dev, page, size) do { } while (0) @@ -702,10 +572,10 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, if (arch_is_coherent() || nommu()) addr = __alloc_simple_buffer(dev, size, gfp, &page); + else if (gfp & GFP_ATOMIC) + addr = __alloc_from_pool(size, &page); else if (!IS_ENABLED(CONFIG_CMA)) addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller); - else if (gfp & GFP_ATOMIC) - addr = __alloc_from_pool(dev, size, &page, caller); else addr = __alloc_from_contiguous(dev, size, prot, &page); @@ -998,9 +868,6 @@ static int arm_dma_set_mask(struct device *dev, u64 dma_mask) static int __init dma_debug_do_init(void) { -#ifdef CONFIG_MMU - arm_vmregion_create_proc("dma-mappings", &consistent_head); -#endif dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); return 0; } @@ -1117,61 +984,32 @@ static int __iommu_free_buffer(struct device *dev, struct page **pages, size_t s * Create a CPU mapping for a specified pages */ static void * -__iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t prot) +__iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t prot, + const void *caller) { - struct arm_vmregion *c; - size_t align; - size_t count = size >> PAGE_SHIFT; - int bit; + unsigned int i, nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; + struct vm_struct *area; + unsigned long p; - if (!consistent_pte[0]) { - pr_err("%s: not initialised\n", __func__); - dump_stack(); + area = get_vm_area_caller(size, VM_ARM_DMA_CONSISTENT | VM_USERMAP, + caller); + if (!area) return NULL; - } - /* - * Align the virtual region allocation - maximum alignment is - * a section size, minimum is a page size. This helps reduce - * fragmentation of the DMA space, and also prevents allocations - * smaller than a section from crossing a section boundary. - */ - bit = fls(size - 1); - if (bit > SECTION_SHIFT) - bit = SECTION_SHIFT; - align = 1 << bit; - - /* - * Allocate a virtual address in the consistent mapping region. - */ - c = arm_vmregion_alloc(&consistent_head, align, size, - gfp & ~(__GFP_DMA | __GFP_HIGHMEM), NULL); - if (c) { - pte_t *pte; - int idx = CONSISTENT_PTE_INDEX(c->vm_start); - int i = 0; - u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1); - - pte = consistent_pte[idx] + off; - c->priv = pages; - - do { - BUG_ON(!pte_none(*pte)); - - set_pte_ext(pte, mk_pte(pages[i], prot), 0); - pte++; - off++; - i++; - if (off >= PTRS_PER_PTE) { - off = 0; - pte = consistent_pte[++idx]; - } - } while (i < count); - - dsb(); + area->pages = pages; + area->nr_pages = nr_pages; + p = (unsigned long)area->addr; - return (void *)c->vm_start; + for (i = 0; i < nr_pages; i++) { + phys_addr_t phys = __pfn_to_phys(page_to_pfn(pages[i])); + if (ioremap_page_range(p, p + PAGE_SIZE, phys, prot)) + goto err; + p += PAGE_SIZE; } + return area->addr; +err: + unmap_kernel_range((unsigned long)area->addr, size); + vunmap(area->addr); return NULL; } @@ -1230,6 +1068,16 @@ static int __iommu_remove_mapping(struct device *dev, dma_addr_t iova, size_t si return 0; } +static struct page **__iommu_get_pages(void *cpu_addr) +{ + struct vm_struct *area; + + area = find_vm_area(cpu_addr); + if (area && (area->flags & VM_ARM_DMA_CONSISTENT)) + return area->pages; + return NULL; +} + static void *arm_iommu_alloc_attrs(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs) { @@ -1248,7 +1096,8 @@ static void *arm_iommu_alloc_attrs(struct device *dev, size_t size, if (*handle == DMA_ERROR_CODE) goto err_buffer; - addr = __iommu_alloc_remap(pages, size, gfp, prot); + addr = __iommu_alloc_remap(pages, size, gfp, prot, + __builtin_return_address(0)); if (!addr) goto err_mapping; @@ -1265,31 +1114,25 @@ static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, struct dma_attrs *attrs) { - struct arm_vmregion *c; + unsigned long uaddr = vma->vm_start; + unsigned long usize = vma->vm_end - vma->vm_start; + struct page **pages = __iommu_get_pages(cpu_addr); vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot); - c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr); - if (c) { - struct page **pages = c->priv; - - unsigned long uaddr = vma->vm_start; - unsigned long usize = vma->vm_end - vma->vm_start; - int i = 0; - - do { - int ret; + if (!pages) + return -ENXIO; - ret = vm_insert_page(vma, uaddr, pages[i++]); - if (ret) { - pr_err("Remapping memory, error: %d\n", ret); - return ret; - } + do { + int ret = vm_insert_page(vma, uaddr, *pages++); + if (ret) { + pr_err("Remapping memory failed: %d\n", ret); + return ret; + } + uaddr += PAGE_SIZE; + usize -= PAGE_SIZE; + } while (usize > 0); - uaddr += PAGE_SIZE; - usize -= PAGE_SIZE; - } while (usize > 0); - } return 0; } @@ -1300,16 +1143,19 @@ static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, void arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle, struct dma_attrs *attrs) { - struct arm_vmregion *c; + struct page **pages = __iommu_get_pages(cpu_addr); size = PAGE_ALIGN(size); - c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr); - if (c) { - struct page **pages = c->priv; - __dma_free_remap(cpu_addr, size); - __iommu_remove_mapping(dev, handle, size); - __iommu_free_buffer(dev, pages, size); + if (!pages) { + WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr); + return; } + + unmap_kernel_range((unsigned long)cpu_addr, size); + vunmap(cpu_addr); + + __iommu_remove_mapping(dev, handle, size); + __iommu_free_buffer(dev, pages, size); } /* diff --git a/arch/arm/mm/mm.h b/arch/arm/mm/mm.h index 2e8a1efdf7b8..6776160618ef 100644 --- a/arch/arm/mm/mm.h +++ b/arch/arm/mm/mm.h @@ -59,6 +59,9 @@ extern void __flush_dcache_page(struct address_space *mapping, struct page *page #define VM_ARM_MTYPE(mt) ((mt) << 20) #define VM_ARM_MTYPE_MASK (0x1f << 20) +/* consistent regions used by dma_alloc_attrs() */ +#define VM_ARM_DMA_CONSISTENT 0x20000000 + #endif #ifdef CONFIG_ZONE_DMA -- cgit v1.2.3 From 50262a4bf38dd70486e9fce2b8235d5ae3e0f627 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Mon, 30 Jul 2012 09:35:26 +0200 Subject: ARM: dma-mapping: add more sanity checks in arm_dma_mmap() Add some sanity checks and forbid mmaping of buffers into vma areas larger than allocated dma buffer. Signed-off-by: Marek Szyprowski --- arch/arm/mm/dma-mapping.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index f906d5f4cbd8..a2881c98fb03 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -611,16 +611,22 @@ int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, { int ret = -ENXIO; #ifdef CONFIG_MMU + unsigned long nr_vma_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; unsigned long pfn = dma_to_pfn(dev, dma_addr); + unsigned long off = vma->vm_pgoff; + vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot); if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) return ret; - ret = remap_pfn_range(vma, vma->vm_start, - pfn + vma->vm_pgoff, - vma->vm_end - vma->vm_start, - vma->vm_page_prot); + if (off < nr_pages && nr_vma_pages <= (nr_pages - off)) { + ret = remap_pfn_range(vma, vma->vm_start, + pfn + off, + vma->vm_end - vma->vm_start, + vma->vm_page_prot); + } #endif /* CONFIG_MMU */ return ret; -- cgit v1.2.3 From 9fa8af91f0679f2abbebe1382b937264f3a8b981 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Fri, 27 Jul 2012 17:12:50 +0200 Subject: ARM: dma-mapping: fix error path for memory allocation failure This patch fixes incorrect check in error path. When the allocation of first page fails, the kernel ops appears due to accessing -1 element of the pages array. Reported-by: Sylwester Nawrocki Signed-off-by: Marek Szyprowski --- arch/arm/mm/dma-mapping.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index a2881c98fb03..cbdeaf4a3392 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -961,7 +961,7 @@ static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, gfp_t return pages; error: - while (--i) + while (i--) if (pages[i]) __free_pages(pages[i], 0); if (array_size <= PAGE_SIZE) -- cgit v1.2.3 From 955c757e090f41188764a0e488ba7036f7dbb215 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Wed, 16 May 2012 19:38:58 +0100 Subject: ARM: dma-mapping: add support for DMA_ATTR_NO_KERNEL_MAPPING attribute This patch adds support for DMA_ATTR_NO_KERNEL_MAPPING attribute for IOMMU allocations, what let drivers to save precious kernel virtual address space for large buffers that are intended to be accessed only from userspace. This patch is heavily based on initial work kindly provided by Abhinav Kochhar . Signed-off-by: Marek Szyprowski Reviewed-by: Kyungmin Park --- arch/arm/mm/dma-mapping.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index cbdeaf4a3392..8e505b9df5c7 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -1074,10 +1074,13 @@ static int __iommu_remove_mapping(struct device *dev, dma_addr_t iova, size_t si return 0; } -static struct page **__iommu_get_pages(void *cpu_addr) +static struct page **__iommu_get_pages(void *cpu_addr, struct dma_attrs *attrs) { struct vm_struct *area; + if (dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs)) + return cpu_addr; + area = find_vm_area(cpu_addr); if (area && (area->flags & VM_ARM_DMA_CONSISTENT)) return area->pages; @@ -1102,6 +1105,9 @@ static void *arm_iommu_alloc_attrs(struct device *dev, size_t size, if (*handle == DMA_ERROR_CODE) goto err_buffer; + if (dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs)) + return pages; + addr = __iommu_alloc_remap(pages, size, gfp, prot, __builtin_return_address(0)); if (!addr) @@ -1122,7 +1128,7 @@ static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, { unsigned long uaddr = vma->vm_start; unsigned long usize = vma->vm_end - vma->vm_start; - struct page **pages = __iommu_get_pages(cpu_addr); + struct page **pages = __iommu_get_pages(cpu_addr, attrs); vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot); @@ -1149,7 +1155,7 @@ static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, void arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle, struct dma_attrs *attrs) { - struct page **pages = __iommu_get_pages(cpu_addr); + struct page **pages = __iommu_get_pages(cpu_addr, attrs); size = PAGE_ALIGN(size); if (!pages) { @@ -1157,8 +1163,10 @@ void arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, return; } - unmap_kernel_range((unsigned long)cpu_addr, size); - vunmap(cpu_addr); + if (!dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs)) { + unmap_kernel_range((unsigned long)cpu_addr, size); + vunmap(cpu_addr); + } __iommu_remove_mapping(dev, handle, size); __iommu_free_buffer(dev, pages, size); -- cgit v1.2.3 From dc2832e1e7db3f9ad465d2fe894bd69ef05d1e4b Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Wed, 13 Jun 2012 10:01:15 +0200 Subject: ARM: dma-mapping: add support for dma_get_sgtable() This patch adds support for dma_get_sgtable() function which is required to let drivers to share the buffers allocated by DMA-mapping subsystem. Generic implementation based on virt_to_page() is not suitable for ARM dma-mapping subsystem. Signed-off-by: Marek Szyprowski Reviewed-by: Kyungmin Park --- arch/arm/mm/dma-mapping.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 8e505b9df5c7..51278ede2028 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -125,6 +125,7 @@ struct dma_map_ops arm_dma_ops = { .alloc = arm_dma_alloc, .free = arm_dma_free, .mmap = arm_dma_mmap, + .get_sgtable = arm_dma_get_sgtable, .map_page = arm_dma_map_page, .unmap_page = arm_dma_unmap_page, .map_sg = arm_dma_map_sg, @@ -661,6 +662,21 @@ void arm_dma_free(struct device *dev, size_t size, void *cpu_addr, } } +int arm_dma_get_sgtable(struct device *dev, struct sg_table *sgt, + void *cpu_addr, dma_addr_t handle, size_t size, + struct dma_attrs *attrs) +{ + struct page *page = pfn_to_page(dma_to_pfn(dev, handle)); + int ret; + + ret = sg_alloc_table(sgt, 1, GFP_KERNEL); + if (unlikely(ret)) + return ret; + + sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0); + return 0; +} + static void dma_cache_maint_page(struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, void (*op)(const void *, size_t, int)) @@ -1172,6 +1188,20 @@ void arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, __iommu_free_buffer(dev, pages, size); } +static int arm_iommu_get_sgtable(struct device *dev, struct sg_table *sgt, + void *cpu_addr, dma_addr_t dma_addr, + size_t size, struct dma_attrs *attrs) +{ + unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; + struct page **pages = __iommu_get_pages(cpu_addr, attrs); + + if (!pages) + return -ENXIO; + + return sg_alloc_table_from_pages(sgt, pages, count, 0, size, + GFP_KERNEL); +} + /* * Map a part of the scatter-gather list into contiguous io address space */ @@ -1431,6 +1461,7 @@ struct dma_map_ops iommu_ops = { .alloc = arm_iommu_alloc_attrs, .free = arm_iommu_free_attrs, .mmap = arm_iommu_mmap_attrs, + .get_sgtable = arm_iommu_get_sgtable, .map_page = arm_iommu_map_page, .unmap_page = arm_iommu_unmap_page, -- cgit v1.2.3 From 97ef952a20853fad72087a53fa556fbec45edd8f Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Wed, 6 Jun 2012 14:50:56 +0200 Subject: ARM: dma-mapping: add support for DMA_ATTR_SKIP_CPU_SYNC attribute This patch adds support for DMA_ATTR_SKIP_CPU_SYNC attribute for dma_(un)map_(single,page,sg) functions family. It lets dma mapping clients to create a mapping for the buffer for the given device without performing a CPU cache synchronization. CPU cache synchronization can be skipped for the buffers which it is known that they are already in 'device' domain (CPU caches have been already synchronized or there are only coherent mappings for the buffer). For advanced users only, please use it with care. Signed-off-by: Marek Szyprowski Reviewed-by: Kyungmin Park --- arch/arm/mm/dma-mapping.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 51278ede2028..b30ba0a1fca3 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -73,7 +73,7 @@ static dma_addr_t arm_dma_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs) { - if (!arch_is_coherent()) + if (!arch_is_coherent() && !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) __dma_page_cpu_to_dev(page, offset, size, dir); return pfn_to_dma(dev, page_to_pfn(page)) + offset; } @@ -96,7 +96,7 @@ static void arm_dma_unmap_page(struct device *dev, dma_addr_t handle, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs) { - if (!arch_is_coherent()) + if (!arch_is_coherent() && !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) __dma_page_dev_to_cpu(pfn_to_page(dma_to_pfn(dev, handle)), handle & ~PAGE_MASK, size, dir); } @@ -1207,7 +1207,7 @@ static int arm_iommu_get_sgtable(struct device *dev, struct sg_table *sgt, */ static int __map_sg_chunk(struct device *dev, struct scatterlist *sg, size_t size, dma_addr_t *handle, - enum dma_data_direction dir) + enum dma_data_direction dir, struct dma_attrs *attrs) { struct dma_iommu_mapping *mapping = dev->archdata.mapping; dma_addr_t iova, iova_base; @@ -1226,7 +1226,8 @@ static int __map_sg_chunk(struct device *dev, struct scatterlist *sg, phys_addr_t phys = page_to_phys(sg_page(s)); unsigned int len = PAGE_ALIGN(s->offset + s->length); - if (!arch_is_coherent()) + if (!arch_is_coherent() && + !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir); ret = iommu_map(mapping->domain, iova, phys, len, 0); @@ -1273,7 +1274,7 @@ int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents, if (s->offset || (size & ~PAGE_MASK) || size + s->length > max) { if (__map_sg_chunk(dev, start, size, &dma->dma_address, - dir) < 0) + dir, attrs) < 0) goto bad_mapping; dma->dma_address += offset; @@ -1286,7 +1287,7 @@ int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents, } size += s->length; } - if (__map_sg_chunk(dev, start, size, &dma->dma_address, dir) < 0) + if (__map_sg_chunk(dev, start, size, &dma->dma_address, dir, attrs) < 0) goto bad_mapping; dma->dma_address += offset; @@ -1320,7 +1321,8 @@ void arm_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, if (sg_dma_len(s)) __iommu_remove_mapping(dev, sg_dma_address(s), sg_dma_len(s)); - if (!arch_is_coherent()) + if (!arch_is_coherent() && + !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) __dma_page_dev_to_cpu(sg_page(s), s->offset, s->length, dir); } @@ -1382,7 +1384,7 @@ static dma_addr_t arm_iommu_map_page(struct device *dev, struct page *page, dma_addr_t dma_addr; int ret, len = PAGE_ALIGN(size + offset); - if (!arch_is_coherent()) + if (!arch_is_coherent() && !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) __dma_page_cpu_to_dev(page, offset, size, dir); dma_addr = __alloc_iova(mapping, len); @@ -1421,7 +1423,7 @@ static void arm_iommu_unmap_page(struct device *dev, dma_addr_t handle, if (!iova) return; - if (!arch_is_coherent()) + if (!arch_is_coherent() && !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) __dma_page_dev_to_cpu(page, offset, size, dir); iommu_unmap(mapping->domain, iova, len); -- cgit v1.2.3 From 5a783cbc48367cfc7b65afc75430953dfe60098f Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 20 Jul 2012 18:24:55 +0100 Subject: ARM: 7478/1: errata: extend workaround for erratum #720789 Commit cdf357f1 ("ARM: 6299/1: errata: TLBIASIDIS and TLBIMVAIS operations can broadcast a faulty ASID") replaced by-ASID TLB flushing operations with all-ASID variants to workaround A9 erratum #720789. This patch extends the workaround to include the tlb_range operations, which were overlooked by the original patch. Cc: Tested-by: Steve Capper Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/mm/tlb-v7.S | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/tlb-v7.S b/arch/arm/mm/tlb-v7.S index 845f461f8ec1..c2021139cb56 100644 --- a/arch/arm/mm/tlb-v7.S +++ b/arch/arm/mm/tlb-v7.S @@ -38,11 +38,19 @@ ENTRY(v7wbi_flush_user_tlb_range) dsb mov r0, r0, lsr #PAGE_SHIFT @ align address mov r1, r1, lsr #PAGE_SHIFT +#ifdef CONFIG_ARM_ERRATA_720789 + mov r3, #0 +#else asid r3, r3 @ mask ASID +#endif orr r0, r3, r0, lsl #PAGE_SHIFT @ Create initial MVA mov r1, r1, lsl #PAGE_SHIFT 1: +#ifdef CONFIG_ARM_ERRATA_720789 + ALT_SMP(mcr p15, 0, r0, c8, c3, 3) @ TLB invalidate U MVA all ASID (shareable) +#else ALT_SMP(mcr p15, 0, r0, c8, c3, 1) @ TLB invalidate U MVA (shareable) +#endif ALT_UP(mcr p15, 0, r0, c8, c7, 1) @ TLB invalidate U MVA add r0, r0, #PAGE_SZ @@ -67,7 +75,11 @@ ENTRY(v7wbi_flush_kern_tlb_range) mov r0, r0, lsl #PAGE_SHIFT mov r1, r1, lsl #PAGE_SHIFT 1: +#ifdef CONFIG_ARM_ERRATA_720789 + ALT_SMP(mcr p15, 0, r0, c8, c3, 3) @ TLB invalidate U MVA all ASID (shareable) +#else ALT_SMP(mcr p15, 0, r0, c8, c3, 1) @ TLB invalidate U MVA (shareable) +#endif ALT_UP(mcr p15, 0, r0, c8, c7, 1) @ TLB invalidate U MVA add r0, r0, #PAGE_SZ cmp r0, r1 -- cgit v1.2.3 From 39f78e70567a07a6fc0d7a4ca9e3331e44dd400d Mon Sep 17 00:00:00 2001 From: Chris Brand Date: Tue, 7 Aug 2012 14:01:14 +0200 Subject: ARM: mm: fix MMU mapping of CMA regions Fix dma_contiguous_remap() so that it continues through all the regions, even after encountering one that is outside lowmem. Without this change, if you have two CMA regions, the first outside lowmem and the seocnd inside lowmem, only the second one will get set up in the MMU. Data written to that region then doesn't get automatically flushed from the cache into memory. Signed-off-by: Chris Brand [extended patch subject with 'fix' word] Signed-off-by: Marek Szyprowski --- arch/arm/mm/dma-mapping.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index c2cdf6500f75..334dd79ad5e6 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -358,7 +358,7 @@ void __init dma_contiguous_remap(void) if (end > arm_lowmem_limit) end = arm_lowmem_limit; if (start >= end) - return; + continue; map.pfn = __phys_to_pfn(start); map.virtual = __phys_to_virt(start); -- cgit v1.2.3 From e4ea6918c93b9f59d34e8ca2124b2b64b1afe73b Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Tue, 7 Aug 2012 14:39:25 +0200 Subject: ARM: dma-mapping: fix atomic allocation alignment The alignment mask is calculated incorrectly. Fixing the calculation makes strange hangs/lockups disappear during the boot with Amstrad E3 and 3.6-rc1 kernel. Signed-off-by: Aaro Koskinen Signed-off-by: Marek Szyprowski --- arch/arm/mm/dma-mapping.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 334dd79ad5e6..4bdeccd24d93 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -423,7 +423,7 @@ static void *__alloc_from_pool(size_t size, struct page **ret_page) unsigned int pageno; unsigned long flags; void *ptr = NULL; - size_t align; + unsigned long align_mask; if (!pool->vaddr) { WARN(1, "coherent pool not initialised!\n"); @@ -435,11 +435,11 @@ static void *__alloc_from_pool(size_t size, struct page **ret_page) * small, so align them to their order in pages, minimum is a page * size. This helps reduce fragmentation of the DMA space. */ - align = PAGE_SIZE << get_order(size); + align_mask = (1 << get_order(size)) - 1; spin_lock_irqsave(&pool->lock, flags); pageno = bitmap_find_next_zero_area(pool->bitmap, pool->nr_pages, - 0, count, (1 << align) - 1); + 0, count, align_mask); if (pageno < pool->nr_pages) { bitmap_set(pool->bitmap, pageno, count); ptr = pool->vaddr + PAGE_SIZE * pageno; -- cgit v1.2.3 From d9e0d149b5dcc2ef4688afc572b9906bcda941ef Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Tue, 7 Aug 2012 14:44:05 +0200 Subject: ARM: dma-mapping: fix incorrect freeing of atomic allocations Commit e9da6e9905e639b0f842a244bc770b48ad0523e9 (ARM: dma-mapping: remove custom consistent dma region) changed the way atomic allocations are handled. However, arm_dma_free() was not modified accordingly, and as a result freeing of atomic allocations does not work correctly when CMA is disabled. Memory is leaked and following WARNINGs are seen: [ 57.698911] ------------[ cut here ]------------ [ 57.753518] WARNING: at arch/arm/mm/dma-mapping.c:263 arm_dma_free+0x88/0xe4() [ 57.811473] trying to free invalid coherent area: e0848000 [ 57.867398] Modules linked in: sata_mv(-) [ 57.921373] [] (unwind_backtrace+0x0/0xf0) from [] (warn_slowpath_common+0x50/0x68) [ 58.033924] [] (warn_slowpath_common+0x50/0x68) from [] (warn_slowpath_fmt+0x30/0x40) [ 58.152024] [] (warn_slowpath_fmt+0x30/0x40) from [] (arm_dma_free+0x88/0xe4) [ 58.219592] [] (arm_dma_free+0x88/0xe4) from [] (dma_pool_destroy+0x100/0x148) [ 58.345526] [] (dma_pool_destroy+0x100/0x148) from [] (release_nodes+0x144/0x218) [ 58.475782] [] (release_nodes+0x144/0x218) from [] (__device_release_driver+0x60/0xb8) [ 58.614260] [] (__device_release_driver+0x60/0xb8) from [] (driver_detach+0xd8/0xec) [ 58.756527] [] (driver_detach+0xd8/0xec) from [] (bus_remove_driver+0x7c/0xc4) [ 58.901648] [] (bus_remove_driver+0x7c/0xc4) from [] (sys_delete_module+0x19c/0x220) [ 59.051447] [] (sys_delete_module+0x19c/0x220) from [] (ret_fast_syscall+0x0/0x2c) [ 59.207996] ---[ end trace 0745420412c0325a ]--- [ 59.287110] ------------[ cut here ]------------ [ 59.366324] WARNING: at arch/arm/mm/dma-mapping.c:263 arm_dma_free+0x88/0xe4() [ 59.450511] trying to free invalid coherent area: e0847000 [ 59.534357] Modules linked in: sata_mv(-) [ 59.616785] [] (unwind_backtrace+0x0/0xf0) from [] (warn_slowpath_common+0x50/0x68) [ 59.790030] [] (warn_slowpath_common+0x50/0x68) from [] (warn_slowpath_fmt+0x30/0x40) [ 59.972322] [] (warn_slowpath_fmt+0x30/0x40) from [] (arm_dma_free+0x88/0xe4) [ 60.070701] [] (arm_dma_free+0x88/0xe4) from [] (dma_pool_destroy+0x100/0x148) [ 60.256817] [] (dma_pool_destroy+0x100/0x148) from [] (release_nodes+0x144/0x218) [ 60.445201] [] (release_nodes+0x144/0x218) from [] (__device_release_driver+0x60/0xb8) [ 60.634148] [] (__device_release_driver+0x60/0xb8) from [] (driver_detach+0xd8/0xec) [ 60.823623] [] (driver_detach+0xd8/0xec) from [] (bus_remove_driver+0x7c/0xc4) [ 61.013268] [] (bus_remove_driver+0x7c/0xc4) from [] (sys_delete_module+0x19c/0x220) [ 61.203472] [] (sys_delete_module+0x19c/0x220) from [] (ret_fast_syscall+0x0/0x2c) [ 61.393390] ---[ end trace 0745420412c0325b ]--- The patch fixes this. Signed-off-by: Aaro Koskinen Signed-off-by: Marek Szyprowski --- arch/arm/mm/dma-mapping.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 4bdeccd24d93..4e7d1182e8a3 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -648,12 +648,12 @@ void arm_dma_free(struct device *dev, size_t size, void *cpu_addr, if (arch_is_coherent() || nommu()) { __dma_free_buffer(page, size); + } else if (__free_from_pool(cpu_addr, size)) { + return; } else if (!IS_ENABLED(CONFIG_CMA)) { __dma_free_remap(cpu_addr, size); __dma_free_buffer(page, size); } else { - if (__free_from_pool(cpu_addr, size)) - return; /* * Non-atomic allocations cannot be freed with IRQs disabled */ -- cgit v1.2.3 From 47f1204329237a0f8655f5a9f14a38ac81946ca1 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 10 Aug 2012 17:51:18 +0100 Subject: ARM: 7487/1: mm: avoid setting nG bit for user mappings that aren't present Swap entries are encoding in ptes such that !pte_present(pte) and pte_file(pte). The remaining bits of the descriptor are used to identify the swapfile and offset within it to the swap entry. When writing such a pte for a user virtual address, set_pte_at unconditionally sets the nG bit, which (in the case of LPAE) will corrupt the swapfile offset and lead to a BUG: [ 140.494067] swap_free: Unused swap offset entry 000763b4 [ 140.509989] BUG: Bad page map in process rs:main Q:Reg pte:0ec76800 pmd:8f92e003 This patch fixes the problem by only setting the nG bit for user mappings that are actually present. Cc: Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/mm/flush.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c index 77458548e031..40ca11ed6e5f 100644 --- a/arch/arm/mm/flush.c +++ b/arch/arm/mm/flush.c @@ -231,8 +231,6 @@ void __sync_icache_dcache(pte_t pteval) struct page *page; struct address_space *mapping; - if (!pte_present_user(pteval)) - return; if (cache_is_vipt_nonaliasing() && !pte_exec(pteval)) /* only flush non-aliasing VIPT caches for exec mappings */ return; -- cgit v1.2.3 From 730a8128cd8978467eb1cf546b11014acb57d433 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 10 Aug 2012 19:13:36 +0100 Subject: ARM: 7489/1: errata: fix workaround for erratum #720789 on UP systems Commit 5a783cbc4836 ("ARM: 7478/1: errata: extend workaround for erratum #720789") added workarounds for erratum #720789 to the range TLB invalidation functions with the observation that the erratum only affects SMP platforms. However, when running an SMP_ON_UP kernel on a uniprocessor platform we must take care to preserve the ASID as the workaround is not required. This patch ensures that we don't set the ASID to 0 when flushing the TLB on such a system, preserving the original behaviour with the workaround disabled. Cc: Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/mm/tlb-v7.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/tlb-v7.S b/arch/arm/mm/tlb-v7.S index c2021139cb56..ea94765acf9a 100644 --- a/arch/arm/mm/tlb-v7.S +++ b/arch/arm/mm/tlb-v7.S @@ -38,10 +38,10 @@ ENTRY(v7wbi_flush_user_tlb_range) dsb mov r0, r0, lsr #PAGE_SHIFT @ align address mov r1, r1, lsr #PAGE_SHIFT -#ifdef CONFIG_ARM_ERRATA_720789 - mov r3, #0 -#else asid r3, r3 @ mask ASID +#ifdef CONFIG_ARM_ERRATA_720789 + ALT_SMP(W(mov) r3, #0 ) + ALT_UP(W(nop) ) #endif orr r0, r3, r0, lsl #PAGE_SHIFT @ Create initial MVA mov r1, r1, lsl #PAGE_SHIFT -- cgit v1.2.3 From a849088aa1552b1a28eea3daff599ee22a734ae3 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 25 Aug 2012 09:03:15 +0100 Subject: ARM: Fix ioremap() of address zero Murali Nalajala reports a regression that ioremapping address zero results in an oops dump: Unable to handle kernel paging request at virtual address fa200000 pgd = d4f80000 [fa200000] *pgd=00000000 Internal error: Oops: 5 [#1] PREEMPT SMP ARM Modules linked in: CPU: 0 Tainted: G W (3.4.0-g3b5f728-00009-g638207a #13) PC is at msm_pm_config_rst_vector_before_pc+0x8/0x30 LR is at msm_pm_boot_config_before_pc+0x18/0x20 pc : [] lr : [] psr: a0000093 sp : c0837ef0 ip : cfe00000 fp : 0000000d r10: da7efc17 r9 : 225c4278 r8 : 00000006 r7 : 0003c000 r6 : c085c824 r5 : 00000001 r4 : fa101000 r3 : fa200000 r2 : c095080c r1 : 002250fc r0 : 00000000 Flags: NzCv IRQs off FIQs on Mode SVC_32 ISA ARM Segment kernel Control: 10c5387d Table: 25180059 DAC: 00000015 [] (msm_pm_config_rst_vector_before_pc+0x8/0x30) from [] (msm_pm_boot_config_before_pc+0x18/0x20) [] (msm_pm_boot_config_before_pc+0x18/0x20) from [] (msm_pm_power_collapse+0x410/0xb04) [] (msm_pm_power_collapse+0x410/0xb04) from [] (arch_idle+0x294/0x3e0) [] (arch_idle+0x294/0x3e0) from [] (default_idle+0x18/0x2c) [] (default_idle+0x18/0x2c) from [] (cpu_idle+0x90/0xe4) [] (cpu_idle+0x90/0xe4) from [] (rest_init+0x88/0xa0) [] (rest_init+0x88/0xa0) from [] (start_kernel+0x3a8/0x40c) Code: c0704256 e12fff1e e59f2020 e5923000 (e5930000) This is caused by the 'reserved' entries which we insert (see 19b52abe3c5d7 - ARM: 7438/1: fill possible PMD empty section gaps) which get matched for physical address zero. Resolve this by marking these reserved entries with a different flag. Cc: Tested-by: Murali Nalajala Acked-by: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/mm/mm.h | 3 +++ arch/arm/mm/mmu.c | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/mm.h b/arch/arm/mm/mm.h index 6776160618ef..a8ee92da3544 100644 --- a/arch/arm/mm/mm.h +++ b/arch/arm/mm/mm.h @@ -55,6 +55,9 @@ extern void __flush_dcache_page(struct address_space *mapping, struct page *page /* permanent static mappings from iotable_init() */ #define VM_ARM_STATIC_MAPPING 0x40000000 +/* empty mapping */ +#define VM_ARM_EMPTY_MAPPING 0x20000000 + /* mapping type (attributes) for permanent static mappings */ #define VM_ARM_MTYPE(mt) ((mt) << 20) #define VM_ARM_MTYPE_MASK (0x1f << 20) diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 4c2d0451e84a..eab94bc6f805 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -807,7 +807,7 @@ static void __init pmd_empty_section_gap(unsigned long addr) vm = early_alloc_aligned(sizeof(*vm), __alignof__(*vm)); vm->addr = (void *)addr; vm->size = SECTION_SIZE; - vm->flags = VM_IOREMAP | VM_ARM_STATIC_MAPPING; + vm->flags = VM_IOREMAP | VM_ARM_EMPTY_MAPPING; vm->caller = pmd_empty_section_gap; vm_area_add_early(vm); } @@ -820,7 +820,7 @@ static void __init fill_pmd_gaps(void) /* we're still single threaded hence no lock needed here */ for (vm = vmlist; vm; vm = vm->next) { - if (!(vm->flags & VM_ARM_STATIC_MAPPING)) + if (!(vm->flags & (VM_ARM_STATIC_MAPPING | VM_ARM_EMPTY_MAPPING))) continue; addr = (unsigned long)vm->addr; if (addr < next) -- cgit v1.2.3