diff options
-rw-r--r-- | arch/arm/mm/cache-v7.S | 55 | ||||
-rw-r--r-- | drivers/video/tegra/nvmap/nvmap_common.h | 36 | ||||
-rw-r--r-- | drivers/video/tegra/nvmap/nvmap_dev.c | 55 | ||||
-rw-r--r-- | drivers/video/tegra/nvmap/nvmap_handle.c | 21 | ||||
-rw-r--r-- | drivers/video/tegra/nvmap/nvmap_heap.c | 19 | ||||
-rw-r--r-- | drivers/video/tegra/nvmap/nvmap_ioctl.c | 124 |
6 files changed, 248 insertions, 62 deletions
diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S index 37c8157e116e..c8c823953a34 100644 --- a/arch/arm/mm/cache-v7.S +++ b/arch/arm/mm/cache-v7.S @@ -18,27 +18,28 @@ #include "proc-macros.S" /* - * v7_flush_dcache_all() + * v7_op_dcache_all op * - * Flush the whole D-cache. + * op=c14, Flush the whole D-cache. + * op=c10, Clean the whole D-cache. * * Corrupted registers: r0-r7, r9-r11 (r6 only in Thumb mode) * * - mm - mm_struct describing address space */ -ENTRY(v7_flush_dcache_all) +.macro v7_op_dcache_all op @ op=c10 clean, op=c14 flush dmb @ ensure ordering with previous memory accesses mrc p15, 1, r0, c0, c0, 1 @ read clidr ands r3, r0, #0x7000000 @ extract loc from clidr mov r3, r3, lsr #23 @ left align loc bit field - beq finished @ if loc is 0, then no need to clean + beq 1005f @ if loc is 0, then no need to clean mov r10, #0 @ start clean at cache level 0 -loop1: +1001: add r2, r10, r10, lsr #1 @ work out 3x current cache level mov r1, r0, lsr r2 @ extract cache type bits from clidr and r1, r1, #7 @ mask of the bits for current cache only cmp r1, #2 @ see what cache we have at this level - blt skip @ skip if no cache, or just i-cache + blt 1004f @ skip if no cache, or just i-cache mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr isb @ isb to sych the new cssr&csidr mrc p15, 1, r1, c0, c0, 0 @ read the new csidr @@ -49,32 +50,40 @@ loop1: clz r5, r4 @ find bit position of way size increment ldr r7, =0x7fff ands r7, r7, r1, lsr #13 @ extract max number of the index size -loop2: +1002: mov r9, r4 @ create working copy of max way size -loop3: +1003: ARM( orr r11, r10, r9, lsl r5 ) @ factor way and cache number into r11 THUMB( lsl r6, r9, r5 ) THUMB( orr r11, r10, r6 ) @ factor way and cache number into r11 ARM( orr r11, r11, r7, lsl r2 ) @ factor index number into r11 THUMB( lsl r6, r7, r2 ) THUMB( orr r11, r11, r6 ) @ factor index number into r11 - mcr p15, 0, r11, c7, c14, 2 @ clean & invalidate by set/way + mcr p15, 0, r11, c7, \op, 2 @ op=c10/c14, clean/flush by set/way subs r9, r9, #1 @ decrement the way - bge loop3 + bge 1003b subs r7, r7, #1 @ decrement the index - bge loop2 -skip: + bge 1002b +1004: add r10, r10, #2 @ increment cache number cmp r3, r10 - bgt loop1 -finished: + bgt 1001b +1005: mov r10, #0 @ swith back to cache level 0 mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr dsb isb mov pc, lr +.endm + +ENTRY(v7_flush_dcache_all) + v7_op_dcache_all c14 ENDPROC(v7_flush_dcache_all) +ENTRY(v7_clean_dcache_all) + v7_op_dcache_all c10 +ENDPROC(v7_clean_dcache_all) + /* * v7_flush_cache_all() * @@ -102,6 +111,24 @@ ENTRY(v7_flush_kern_cache_all) ENDPROC(v7_flush_kern_cache_all) /* + * v7_clean_kern_cache_all() + */ +ENTRY(v7_clean_kern_cache_all) + ARM( stmfd sp!, {r4-r5, r7, r9-r11, lr} ) + THUMB( stmfd sp!, {r4-r7, r9-r11, lr} ) + bl v7_clean_dcache_all + mov r0, #0 +#ifdef CONFIG_SMP + mcr p15, 0, r0, c7, c1, 0 @ invalidate I-cache inner shareable +#else + mcr p15, 0, r0, c7, c5, 0 @ I+BTB cache invalidate +#endif + ARM( ldmfd sp!, {r4-r5, r7, r9-r11, lr} ) + THUMB( ldmfd sp!, {r4-r7, r9-r11, lr} ) + mov pc, lr +ENDPROC(v7_clean_kern_cache_all) + +/* * v7_flush_cache_all() * * Flush all TLB entries in a particular address space diff --git a/drivers/video/tegra/nvmap/nvmap_common.h b/drivers/video/tegra/nvmap/nvmap_common.h new file mode 100644 index 000000000000..20d27fa955bb --- /dev/null +++ b/drivers/video/tegra/nvmap/nvmap_common.h @@ -0,0 +1,36 @@ +/* + * drivers/video/tegra/nvmap/nvmap_common.h + * + * GPU memory management driver for Tegra + * + * Copyright (c) 2011, NVIDIA Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + *' + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +extern void v7_flush_kern_cache_all(void *); +extern void v7_clean_kern_cache_all(void *); + +#define FLUSH_CLEAN_BY_SET_WAY_THRESHOLD (8 * PAGE_SIZE) + +static inline void inner_flush_cache_all(void) +{ + on_each_cpu(v7_flush_kern_cache_all, NULL, 1); +} + +static inline void inner_clean_cache_all(void) +{ + on_each_cpu(v7_clean_kern_cache_all, NULL, 1); +} diff --git a/drivers/video/tegra/nvmap/nvmap_dev.c b/drivers/video/tegra/nvmap/nvmap_dev.c index ed97228d0d63..5d63dbc4fbc9 100644 --- a/drivers/video/tegra/nvmap/nvmap_dev.c +++ b/drivers/video/tegra/nvmap/nvmap_dev.c @@ -3,7 +3,7 @@ * * User-space interface to nvmap * - * Copyright (c) 2010, NVIDIA Corporation. + * Copyright (c) 2011, NVIDIA Corporation. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -44,6 +44,7 @@ #include "nvmap.h" #include "nvmap_ioctl.h" #include "nvmap_mru.h" +#include "nvmap_common.h" #define NVMAP_NUM_PTES 64 #define NVMAP_CARVEOUT_KILLER_RETRY_TIME 100 /* msecs */ @@ -250,8 +251,30 @@ unsigned long nvmap_carveout_usage(struct nvmap_client *c, return 0; } -static int nvmap_flush_heap_block(struct nvmap_client *client, - struct nvmap_heap_block *block, size_t len) +/* + * This routine is used to flush the carveout memory from cache. + * Why cache flush is needed for carveout? Consider the case, where a piece of + * carveout is allocated as cached and released. After this, if the same memory is + * allocated for uncached request and the memory is not flushed out from cache. + * In this case, the client might pass this to H/W engine and it could start modify + * the memory. As this was cached earlier, it might have some portion of it in cache. + * During cpu request to read/write other memory, the cached portion of this memory + * might get flushed back to main memory and would cause corruptions, if it happens + * after H/W writes data to memory. + * + * But flushing out the memory blindly on each carveout allocation is redundant. + * + * In order to optimize the carveout buffer cache flushes, the following + * strategy is used. + * + * The whole Carveout is flushed out from cache during its initialization. + * During allocation, carveout buffers are not flused from cache. + * During deallocation, carveout buffers are flushed, if they were allocated as cached. + * if they were allocated as uncached/writecombined, no cache flush is needed. + * Just draining store buffers is enough. + */ +int nvmap_flush_heap_block(struct nvmap_client *client, + struct nvmap_heap_block *block, size_t len, unsigned int prot) { pte_t **pte; void *addr; @@ -259,7 +282,17 @@ static int nvmap_flush_heap_block(struct nvmap_client *client, unsigned long phys = block->base; unsigned long end = block->base + len; - pte = nvmap_alloc_pte(client->dev, &addr); + if (prot == NVMAP_HANDLE_UNCACHEABLE || prot == NVMAP_HANDLE_WRITE_COMBINE) + goto out; + + if ( len >= FLUSH_CLEAN_BY_SET_WAY_THRESHOLD ) { + inner_flush_cache_all(); + if (prot != NVMAP_HANDLE_INNER_CACHEABLE) + outer_flush_range(block->base, block->base + len); + goto out; + } + + pte = nvmap_alloc_pte((client ? client->dev : nvmap_dev), &addr); if (IS_ERR(pte)) return PTR_ERR(pte); @@ -277,9 +310,12 @@ static int nvmap_flush_heap_block(struct nvmap_client *client, phys = next; } - outer_flush_range(block->base, block->base + len); + if (prot != NVMAP_HANDLE_INNER_CACHEABLE) + outer_flush_range(block->base, block->base + len); - nvmap_free_pte(client->dev, pte); + nvmap_free_pte((client ? client->dev: nvmap_dev), pte); +out: + wmb(); return 0; } @@ -421,13 +457,6 @@ struct nvmap_heap_block *do_nvmap_carveout_alloc(struct nvmap_client *client, block = nvmap_heap_alloc(co_heap->carveout, len, align, prot, handle); if (block) { - /* flush any stale data that may be left in the - * cache at the block's address, since the new - * block may be mapped uncached */ - if (nvmap_flush_heap_block(client, block, len)) { - nvmap_heap_free(block); - block = NULL; - } return block; } } diff --git a/drivers/video/tegra/nvmap/nvmap_handle.c b/drivers/video/tegra/nvmap/nvmap_handle.c index dc3be30ca2f5..a9150a36cf2a 100644 --- a/drivers/video/tegra/nvmap/nvmap_handle.c +++ b/drivers/video/tegra/nvmap/nvmap_handle.c @@ -37,6 +37,7 @@ #include "nvmap.h" #include "nvmap_mru.h" +#include "nvmap_common.h" #define NVMAP_SECURE_HEAPS (NVMAP_HEAP_CARVEOUT_IRAM | NVMAP_HEAP_IOVMM) #ifdef CONFIG_NVMAP_HIGHMEM_ONLY @@ -107,7 +108,8 @@ out: extern void __flush_dcache_page(struct address_space *, struct page *); -static struct page *nvmap_alloc_pages_exact(gfp_t gfp, size_t size) +static struct page *nvmap_alloc_pages_exact(gfp_t gfp, + size_t size, bool flush_inner) { struct page *page, *p, *e; unsigned int order; @@ -127,8 +129,10 @@ static struct page *nvmap_alloc_pages_exact(gfp_t gfp, size_t size) __free_page(p); e = page + (size >> PAGE_SHIFT); - for (p = page; p < e; p++) - __flush_dcache_page(page_mapping(p), p); + if (flush_inner) { + for (p = page; p < e; p++) + __flush_dcache_page(page_mapping(p), p); + } base = page_to_phys(page); outer_flush_range(base, base + size); @@ -143,6 +147,7 @@ static int handle_page_alloc(struct nvmap_client *client, pgprot_t prot; unsigned int i = 0; struct page **pages; + bool flush_inner = true; pages = altalloc(nr_page * sizeof(*pages)); if (!pages) @@ -155,10 +160,14 @@ static int handle_page_alloc(struct nvmap_client *client, contiguous = true; #endif + if (size >= FLUSH_CLEAN_BY_SET_WAY_THRESHOLD) { + inner_flush_cache_all(); + flush_inner = false; + } h->pgalloc.area = NULL; if (contiguous) { struct page *page; - page = nvmap_alloc_pages_exact(GFP_NVMAP, size); + page = nvmap_alloc_pages_exact(GFP_NVMAP, size, flush_inner); if (!page) goto fail; @@ -167,7 +176,8 @@ static int handle_page_alloc(struct nvmap_client *client, } else { for (i = 0; i < nr_page; i++) { - pages[i] = nvmap_alloc_pages_exact(GFP_NVMAP, PAGE_SIZE); + pages[i] = nvmap_alloc_pages_exact(GFP_NVMAP, PAGE_SIZE, + flush_inner); if (!pages[i]) goto fail; } @@ -193,6 +203,7 @@ fail: while (i--) __free_page(pages[i]); altfree(pages, nr_page * sizeof(*pages)); + wmb(); return -ENOMEM; } diff --git a/drivers/video/tegra/nvmap/nvmap_heap.c b/drivers/video/tegra/nvmap/nvmap_heap.c index c920048db82b..a0a574d78944 100644 --- a/drivers/video/tegra/nvmap/nvmap_heap.c +++ b/drivers/video/tegra/nvmap/nvmap_heap.c @@ -3,7 +3,7 @@ * * GPU heap allocator. * - * Copyright (c) 2010, NVIDIA Corporation. + * Copyright (c) 2011, NVIDIA Corporation. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -31,6 +31,7 @@ #include <mach/nvmap.h> #include "nvmap.h" #include "nvmap_heap.h" +#include "nvmap_common.h" #include <asm/tlbflush.h> #include <asm/cacheflush.h> @@ -887,6 +888,9 @@ struct nvmap_heap_block *nvmap_heap_alloc(struct nvmap_heap *h, size_t len, struct nvmap_heap *nvmap_block_to_heap(struct nvmap_heap_block *b) { + struct buddy_heap *bh = NULL; + struct nvmap_heap *h; + if (b->type == BLOCK_BUDDY) { struct buddy_block *bb; bb = container_of(b, struct buddy_block, block); @@ -898,17 +902,24 @@ struct nvmap_heap *nvmap_block_to_heap(struct nvmap_heap_block *b) } } +int nvmap_flush_heap_block(struct nvmap_client *client, + struct nvmap_heap_block *block, size_t len, unsigned int prot); + /* nvmap_heap_free: frees block b*/ void nvmap_heap_free(struct nvmap_heap_block *b) { struct buddy_heap *bh = NULL; struct nvmap_heap *h = nvmap_block_to_heap(b); + struct list_block *lb; mutex_lock(&h->lock); if (b->type == BLOCK_BUDDY) bh = do_buddy_free(b); - else + else { + lb = container_of(b, struct list_block, block); + nvmap_flush_heap_block(NULL, b, lb->size, lb->mem_prot); do_heap_free(b); + } if (bh) { list_del(&bh->buddy_list); @@ -1008,6 +1019,10 @@ struct nvmap_heap *nvmap_heap_create(struct device *parent, const char *name, l->orig_addr = base; list_add_tail(&l->free_list, &h->free_list); list_add_tail(&l->all_list, &h->all_list); + + inner_flush_cache_all(); + outer_flush_range(base, base + len); + wmb(); return h; fail_register: diff --git a/drivers/video/tegra/nvmap/nvmap_ioctl.c b/drivers/video/tegra/nvmap/nvmap_ioctl.c index fb8c5ff00bdd..fc367c89ad45 100644 --- a/drivers/video/tegra/nvmap/nvmap_ioctl.c +++ b/drivers/video/tegra/nvmap/nvmap_ioctl.c @@ -3,7 +3,7 @@ * * User-space interface to nvmap * - * Copyright (c) 2010, NVIDIA Corporation. + * Copyright (c) 2011, NVIDIA Corporation. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -35,6 +35,7 @@ #include "nvmap_ioctl.h" #include "nvmap.h" +#include "nvmap_common.h" static ssize_t rw_handle(struct nvmap_client *client, struct nvmap_handle *h, int is_read, unsigned long h_offs, @@ -477,10 +478,91 @@ int nvmap_ioctl_free(struct file *filp, unsigned long arg) return 0; } +static void inner_cache_maint(unsigned int op, void *vaddr, size_t size) +{ + if (op == NVMAP_CACHE_OP_WB_INV) + dmac_flush_range(vaddr, vaddr + size); + else if (op == NVMAP_CACHE_OP_INV) + dmac_map_area(vaddr, size, DMA_FROM_DEVICE); + else + dmac_map_area(vaddr, size, DMA_TO_DEVICE); +} + +static void outer_cache_maint(unsigned int op, unsigned long paddr, size_t size) +{ + if (op == NVMAP_CACHE_OP_WB_INV) + outer_flush_range(paddr, paddr + size); + else if (op == NVMAP_CACHE_OP_INV) + outer_inv_range(paddr, paddr + size); + else + outer_clean_range(paddr, paddr + size); +} + +static void heap_page_cache_maint(struct nvmap_client *client, + struct nvmap_handle *h, unsigned long start, unsigned long end, + unsigned int op, bool inner, bool outer, pte_t **pte, + unsigned long kaddr, pgprot_t prot) +{ + struct page *page; + unsigned long paddr; + unsigned long next; + unsigned long off; + size_t size; + + while (start < end) { + page = h->pgalloc.pages[start >> PAGE_SHIFT]; + next = min(((start + PAGE_SIZE) & PAGE_MASK), end); + off = start & ~PAGE_MASK; + size = next - start; + paddr = page_to_phys(page) + off; + + if (inner) { + void *vaddr = (void *)kaddr + off; + BUG_ON(!pte); + BUG_ON(!kaddr); + set_pte_at(&init_mm, kaddr, *pte, + pfn_pte(__phys_to_pfn(paddr), prot)); + flush_tlb_kernel_page(kaddr); + inner_cache_maint(op, vaddr, size); + } + + if (outer) + outer_cache_maint(op, paddr, size); + start = next; + } +} + +static bool fast_cache_maint(struct nvmap_client *client, struct nvmap_handle *h, + unsigned long start, unsigned long end, unsigned int op) +{ + int ret = false; + + if ( (op == NVMAP_CACHE_OP_INV) || + ((end - start) < FLUSH_CLEAN_BY_SET_WAY_THRESHOLD) ) + goto out; + + if (op == NVMAP_CACHE_OP_WB_INV) { + inner_flush_cache_all(); + } else if (op == NVMAP_CACHE_OP_WB) { + inner_clean_cache_all(); + } + + if (h->heap_pgalloc && (h->flags != NVMAP_HANDLE_INNER_CACHEABLE)) { + heap_page_cache_maint(client, h, start, end, op, + false, true, NULL, 0, 0); + } else if (h->flags != NVMAP_HANDLE_INNER_CACHEABLE) { + start += h->carveout->base; + end += h->carveout->base; + outer_cache_maint(op, start, end - start); + } + ret = true; +out: + return ret; +} + static int cache_maint(struct nvmap_client *client, struct nvmap_handle *h, unsigned long start, unsigned long end, unsigned int op) { - enum dma_data_direction dir; pgprot_t prot; pte_t **pte = NULL; unsigned long kaddr; @@ -501,26 +583,8 @@ static int cache_maint(struct nvmap_client *client, struct nvmap_handle *h, start == end) goto out; - if (WARN_ON_ONCE(op == NVMAP_CACHE_OP_WB_INV)) - dir = DMA_BIDIRECTIONAL; - else if (op == NVMAP_CACHE_OP_WB) - dir = DMA_TO_DEVICE; - else - dir = DMA_FROM_DEVICE; - - if (h->heap_pgalloc) { - while (start < end) { - unsigned long next = (start + PAGE_SIZE) & PAGE_MASK; - struct page *page; - - page = h->pgalloc.pages[start >> PAGE_SHIFT]; - next = min(next, end); - __dma_page_cpu_to_dev(page, start & ~PAGE_MASK, - next - start, dir); - start = next; - } + if (fast_cache_maint(client, h, start, end, op)) goto out; - } prot = nvmap_pgprot(h, pgprot_kernel); pte = nvmap_alloc_pte(client->dev, (void **)&kaddr); @@ -530,6 +594,13 @@ static int cache_maint(struct nvmap_client *client, struct nvmap_handle *h, goto out; } + if (h->heap_pgalloc) { + heap_page_cache_maint(client, h, start, end, op, true, + (h->flags == NVMAP_HANDLE_INNER_CACHEABLE) ? false : true, + pte, kaddr, prot); + goto out; + } + if (start > h->size || end > h->size) { nvmap_warn(client, "cache maintenance outside handle\n"); return -EINVAL; @@ -552,16 +623,13 @@ static int cache_maint(struct nvmap_client *client, struct nvmap_handle *h, pfn_pte(__phys_to_pfn(loop), prot)); flush_tlb_kernel_page(kaddr); - dmac_map_area(base, next - loop, dir); + inner_cache_maint(op, base, next - loop); loop = next; } - if (h->flags != NVMAP_HANDLE_INNER_CACHEABLE) { - if (dir != DMA_FROM_DEVICE) - outer_clean_range(start, end); - else - outer_inv_range(start, end); - } + if (h->flags != NVMAP_HANDLE_INNER_CACHEABLE) + outer_cache_maint(op, start, end - start); + /* unlock carveout */ nvmap_usecount_dec(h); |