summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/arm/mm/cache-v7.S55
-rw-r--r--drivers/video/tegra/nvmap/nvmap_common.h36
-rw-r--r--drivers/video/tegra/nvmap/nvmap_dev.c55
-rw-r--r--drivers/video/tegra/nvmap/nvmap_handle.c21
-rw-r--r--drivers/video/tegra/nvmap/nvmap_heap.c19
-rw-r--r--drivers/video/tegra/nvmap/nvmap_ioctl.c124
6 files changed, 248 insertions, 62 deletions
diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S
index 37c8157e116e..c8c823953a34 100644
--- a/arch/arm/mm/cache-v7.S
+++ b/arch/arm/mm/cache-v7.S
@@ -18,27 +18,28 @@
#include "proc-macros.S"
/*
- * v7_flush_dcache_all()
+ * v7_op_dcache_all op
*
- * Flush the whole D-cache.
+ * op=c14, Flush the whole D-cache.
+ * op=c10, Clean the whole D-cache.
*
* Corrupted registers: r0-r7, r9-r11 (r6 only in Thumb mode)
*
* - mm - mm_struct describing address space
*/
-ENTRY(v7_flush_dcache_all)
+.macro v7_op_dcache_all op @ op=c10 clean, op=c14 flush
dmb @ ensure ordering with previous memory accesses
mrc p15, 1, r0, c0, c0, 1 @ read clidr
ands r3, r0, #0x7000000 @ extract loc from clidr
mov r3, r3, lsr #23 @ left align loc bit field
- beq finished @ if loc is 0, then no need to clean
+ beq 1005f @ if loc is 0, then no need to clean
mov r10, #0 @ start clean at cache level 0
-loop1:
+1001:
add r2, r10, r10, lsr #1 @ work out 3x current cache level
mov r1, r0, lsr r2 @ extract cache type bits from clidr
and r1, r1, #7 @ mask of the bits for current cache only
cmp r1, #2 @ see what cache we have at this level
- blt skip @ skip if no cache, or just i-cache
+ blt 1004f @ skip if no cache, or just i-cache
mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr
isb @ isb to sych the new cssr&csidr
mrc p15, 1, r1, c0, c0, 0 @ read the new csidr
@@ -49,32 +50,40 @@ loop1:
clz r5, r4 @ find bit position of way size increment
ldr r7, =0x7fff
ands r7, r7, r1, lsr #13 @ extract max number of the index size
-loop2:
+1002:
mov r9, r4 @ create working copy of max way size
-loop3:
+1003:
ARM( orr r11, r10, r9, lsl r5 ) @ factor way and cache number into r11
THUMB( lsl r6, r9, r5 )
THUMB( orr r11, r10, r6 ) @ factor way and cache number into r11
ARM( orr r11, r11, r7, lsl r2 ) @ factor index number into r11
THUMB( lsl r6, r7, r2 )
THUMB( orr r11, r11, r6 ) @ factor index number into r11
- mcr p15, 0, r11, c7, c14, 2 @ clean & invalidate by set/way
+ mcr p15, 0, r11, c7, \op, 2 @ op=c10/c14, clean/flush by set/way
subs r9, r9, #1 @ decrement the way
- bge loop3
+ bge 1003b
subs r7, r7, #1 @ decrement the index
- bge loop2
-skip:
+ bge 1002b
+1004:
add r10, r10, #2 @ increment cache number
cmp r3, r10
- bgt loop1
-finished:
+ bgt 1001b
+1005:
mov r10, #0 @ swith back to cache level 0
mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr
dsb
isb
mov pc, lr
+.endm
+
+ENTRY(v7_flush_dcache_all)
+ v7_op_dcache_all c14
ENDPROC(v7_flush_dcache_all)
+ENTRY(v7_clean_dcache_all)
+ v7_op_dcache_all c10
+ENDPROC(v7_clean_dcache_all)
+
/*
* v7_flush_cache_all()
*
@@ -102,6 +111,24 @@ ENTRY(v7_flush_kern_cache_all)
ENDPROC(v7_flush_kern_cache_all)
/*
+ * v7_clean_kern_cache_all()
+ */
+ENTRY(v7_clean_kern_cache_all)
+ ARM( stmfd sp!, {r4-r5, r7, r9-r11, lr} )
+ THUMB( stmfd sp!, {r4-r7, r9-r11, lr} )
+ bl v7_clean_dcache_all
+ mov r0, #0
+#ifdef CONFIG_SMP
+ mcr p15, 0, r0, c7, c1, 0 @ invalidate I-cache inner shareable
+#else
+ mcr p15, 0, r0, c7, c5, 0 @ I+BTB cache invalidate
+#endif
+ ARM( ldmfd sp!, {r4-r5, r7, r9-r11, lr} )
+ THUMB( ldmfd sp!, {r4-r7, r9-r11, lr} )
+ mov pc, lr
+ENDPROC(v7_clean_kern_cache_all)
+
+/*
* v7_flush_cache_all()
*
* Flush all TLB entries in a particular address space
diff --git a/drivers/video/tegra/nvmap/nvmap_common.h b/drivers/video/tegra/nvmap/nvmap_common.h
new file mode 100644
index 000000000000..20d27fa955bb
--- /dev/null
+++ b/drivers/video/tegra/nvmap/nvmap_common.h
@@ -0,0 +1,36 @@
+/*
+ * drivers/video/tegra/nvmap/nvmap_common.h
+ *
+ * GPU memory management driver for Tegra
+ *
+ * Copyright (c) 2011, NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *'
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+extern void v7_flush_kern_cache_all(void *);
+extern void v7_clean_kern_cache_all(void *);
+
+#define FLUSH_CLEAN_BY_SET_WAY_THRESHOLD (8 * PAGE_SIZE)
+
+static inline void inner_flush_cache_all(void)
+{
+ on_each_cpu(v7_flush_kern_cache_all, NULL, 1);
+}
+
+static inline void inner_clean_cache_all(void)
+{
+ on_each_cpu(v7_clean_kern_cache_all, NULL, 1);
+}
diff --git a/drivers/video/tegra/nvmap/nvmap_dev.c b/drivers/video/tegra/nvmap/nvmap_dev.c
index ed97228d0d63..5d63dbc4fbc9 100644
--- a/drivers/video/tegra/nvmap/nvmap_dev.c
+++ b/drivers/video/tegra/nvmap/nvmap_dev.c
@@ -3,7 +3,7 @@
*
* User-space interface to nvmap
*
- * Copyright (c) 2010, NVIDIA Corporation.
+ * Copyright (c) 2011, NVIDIA Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -44,6 +44,7 @@
#include "nvmap.h"
#include "nvmap_ioctl.h"
#include "nvmap_mru.h"
+#include "nvmap_common.h"
#define NVMAP_NUM_PTES 64
#define NVMAP_CARVEOUT_KILLER_RETRY_TIME 100 /* msecs */
@@ -250,8 +251,30 @@ unsigned long nvmap_carveout_usage(struct nvmap_client *c,
return 0;
}
-static int nvmap_flush_heap_block(struct nvmap_client *client,
- struct nvmap_heap_block *block, size_t len)
+/*
+ * This routine is used to flush the carveout memory from cache.
+ * Why cache flush is needed for carveout? Consider the case, where a piece of
+ * carveout is allocated as cached and released. After this, if the same memory is
+ * allocated for uncached request and the memory is not flushed out from cache.
+ * In this case, the client might pass this to H/W engine and it could start modify
+ * the memory. As this was cached earlier, it might have some portion of it in cache.
+ * During cpu request to read/write other memory, the cached portion of this memory
+ * might get flushed back to main memory and would cause corruptions, if it happens
+ * after H/W writes data to memory.
+ *
+ * But flushing out the memory blindly on each carveout allocation is redundant.
+ *
+ * In order to optimize the carveout buffer cache flushes, the following
+ * strategy is used.
+ *
+ * The whole Carveout is flushed out from cache during its initialization.
+ * During allocation, carveout buffers are not flused from cache.
+ * During deallocation, carveout buffers are flushed, if they were allocated as cached.
+ * if they were allocated as uncached/writecombined, no cache flush is needed.
+ * Just draining store buffers is enough.
+ */
+int nvmap_flush_heap_block(struct nvmap_client *client,
+ struct nvmap_heap_block *block, size_t len, unsigned int prot)
{
pte_t **pte;
void *addr;
@@ -259,7 +282,17 @@ static int nvmap_flush_heap_block(struct nvmap_client *client,
unsigned long phys = block->base;
unsigned long end = block->base + len;
- pte = nvmap_alloc_pte(client->dev, &addr);
+ if (prot == NVMAP_HANDLE_UNCACHEABLE || prot == NVMAP_HANDLE_WRITE_COMBINE)
+ goto out;
+
+ if ( len >= FLUSH_CLEAN_BY_SET_WAY_THRESHOLD ) {
+ inner_flush_cache_all();
+ if (prot != NVMAP_HANDLE_INNER_CACHEABLE)
+ outer_flush_range(block->base, block->base + len);
+ goto out;
+ }
+
+ pte = nvmap_alloc_pte((client ? client->dev : nvmap_dev), &addr);
if (IS_ERR(pte))
return PTR_ERR(pte);
@@ -277,9 +310,12 @@ static int nvmap_flush_heap_block(struct nvmap_client *client,
phys = next;
}
- outer_flush_range(block->base, block->base + len);
+ if (prot != NVMAP_HANDLE_INNER_CACHEABLE)
+ outer_flush_range(block->base, block->base + len);
- nvmap_free_pte(client->dev, pte);
+ nvmap_free_pte((client ? client->dev: nvmap_dev), pte);
+out:
+ wmb();
return 0;
}
@@ -421,13 +457,6 @@ struct nvmap_heap_block *do_nvmap_carveout_alloc(struct nvmap_client *client,
block = nvmap_heap_alloc(co_heap->carveout, len,
align, prot, handle);
if (block) {
- /* flush any stale data that may be left in the
- * cache at the block's address, since the new
- * block may be mapped uncached */
- if (nvmap_flush_heap_block(client, block, len)) {
- nvmap_heap_free(block);
- block = NULL;
- }
return block;
}
}
diff --git a/drivers/video/tegra/nvmap/nvmap_handle.c b/drivers/video/tegra/nvmap/nvmap_handle.c
index dc3be30ca2f5..a9150a36cf2a 100644
--- a/drivers/video/tegra/nvmap/nvmap_handle.c
+++ b/drivers/video/tegra/nvmap/nvmap_handle.c
@@ -37,6 +37,7 @@
#include "nvmap.h"
#include "nvmap_mru.h"
+#include "nvmap_common.h"
#define NVMAP_SECURE_HEAPS (NVMAP_HEAP_CARVEOUT_IRAM | NVMAP_HEAP_IOVMM)
#ifdef CONFIG_NVMAP_HIGHMEM_ONLY
@@ -107,7 +108,8 @@ out:
extern void __flush_dcache_page(struct address_space *, struct page *);
-static struct page *nvmap_alloc_pages_exact(gfp_t gfp, size_t size)
+static struct page *nvmap_alloc_pages_exact(gfp_t gfp,
+ size_t size, bool flush_inner)
{
struct page *page, *p, *e;
unsigned int order;
@@ -127,8 +129,10 @@ static struct page *nvmap_alloc_pages_exact(gfp_t gfp, size_t size)
__free_page(p);
e = page + (size >> PAGE_SHIFT);
- for (p = page; p < e; p++)
- __flush_dcache_page(page_mapping(p), p);
+ if (flush_inner) {
+ for (p = page; p < e; p++)
+ __flush_dcache_page(page_mapping(p), p);
+ }
base = page_to_phys(page);
outer_flush_range(base, base + size);
@@ -143,6 +147,7 @@ static int handle_page_alloc(struct nvmap_client *client,
pgprot_t prot;
unsigned int i = 0;
struct page **pages;
+ bool flush_inner = true;
pages = altalloc(nr_page * sizeof(*pages));
if (!pages)
@@ -155,10 +160,14 @@ static int handle_page_alloc(struct nvmap_client *client,
contiguous = true;
#endif
+ if (size >= FLUSH_CLEAN_BY_SET_WAY_THRESHOLD) {
+ inner_flush_cache_all();
+ flush_inner = false;
+ }
h->pgalloc.area = NULL;
if (contiguous) {
struct page *page;
- page = nvmap_alloc_pages_exact(GFP_NVMAP, size);
+ page = nvmap_alloc_pages_exact(GFP_NVMAP, size, flush_inner);
if (!page)
goto fail;
@@ -167,7 +176,8 @@ static int handle_page_alloc(struct nvmap_client *client,
} else {
for (i = 0; i < nr_page; i++) {
- pages[i] = nvmap_alloc_pages_exact(GFP_NVMAP, PAGE_SIZE);
+ pages[i] = nvmap_alloc_pages_exact(GFP_NVMAP, PAGE_SIZE,
+ flush_inner);
if (!pages[i])
goto fail;
}
@@ -193,6 +203,7 @@ fail:
while (i--)
__free_page(pages[i]);
altfree(pages, nr_page * sizeof(*pages));
+ wmb();
return -ENOMEM;
}
diff --git a/drivers/video/tegra/nvmap/nvmap_heap.c b/drivers/video/tegra/nvmap/nvmap_heap.c
index c920048db82b..a0a574d78944 100644
--- a/drivers/video/tegra/nvmap/nvmap_heap.c
+++ b/drivers/video/tegra/nvmap/nvmap_heap.c
@@ -3,7 +3,7 @@
*
* GPU heap allocator.
*
- * Copyright (c) 2010, NVIDIA Corporation.
+ * Copyright (c) 2011, NVIDIA Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -31,6 +31,7 @@
#include <mach/nvmap.h>
#include "nvmap.h"
#include "nvmap_heap.h"
+#include "nvmap_common.h"
#include <asm/tlbflush.h>
#include <asm/cacheflush.h>
@@ -887,6 +888,9 @@ struct nvmap_heap_block *nvmap_heap_alloc(struct nvmap_heap *h, size_t len,
struct nvmap_heap *nvmap_block_to_heap(struct nvmap_heap_block *b)
{
+ struct buddy_heap *bh = NULL;
+ struct nvmap_heap *h;
+
if (b->type == BLOCK_BUDDY) {
struct buddy_block *bb;
bb = container_of(b, struct buddy_block, block);
@@ -898,17 +902,24 @@ struct nvmap_heap *nvmap_block_to_heap(struct nvmap_heap_block *b)
}
}
+int nvmap_flush_heap_block(struct nvmap_client *client,
+ struct nvmap_heap_block *block, size_t len, unsigned int prot);
+
/* nvmap_heap_free: frees block b*/
void nvmap_heap_free(struct nvmap_heap_block *b)
{
struct buddy_heap *bh = NULL;
struct nvmap_heap *h = nvmap_block_to_heap(b);
+ struct list_block *lb;
mutex_lock(&h->lock);
if (b->type == BLOCK_BUDDY)
bh = do_buddy_free(b);
- else
+ else {
+ lb = container_of(b, struct list_block, block);
+ nvmap_flush_heap_block(NULL, b, lb->size, lb->mem_prot);
do_heap_free(b);
+ }
if (bh) {
list_del(&bh->buddy_list);
@@ -1008,6 +1019,10 @@ struct nvmap_heap *nvmap_heap_create(struct device *parent, const char *name,
l->orig_addr = base;
list_add_tail(&l->free_list, &h->free_list);
list_add_tail(&l->all_list, &h->all_list);
+
+ inner_flush_cache_all();
+ outer_flush_range(base, base + len);
+ wmb();
return h;
fail_register:
diff --git a/drivers/video/tegra/nvmap/nvmap_ioctl.c b/drivers/video/tegra/nvmap/nvmap_ioctl.c
index fb8c5ff00bdd..fc367c89ad45 100644
--- a/drivers/video/tegra/nvmap/nvmap_ioctl.c
+++ b/drivers/video/tegra/nvmap/nvmap_ioctl.c
@@ -3,7 +3,7 @@
*
* User-space interface to nvmap
*
- * Copyright (c) 2010, NVIDIA Corporation.
+ * Copyright (c) 2011, NVIDIA Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -35,6 +35,7 @@
#include "nvmap_ioctl.h"
#include "nvmap.h"
+#include "nvmap_common.h"
static ssize_t rw_handle(struct nvmap_client *client, struct nvmap_handle *h,
int is_read, unsigned long h_offs,
@@ -477,10 +478,91 @@ int nvmap_ioctl_free(struct file *filp, unsigned long arg)
return 0;
}
+static void inner_cache_maint(unsigned int op, void *vaddr, size_t size)
+{
+ if (op == NVMAP_CACHE_OP_WB_INV)
+ dmac_flush_range(vaddr, vaddr + size);
+ else if (op == NVMAP_CACHE_OP_INV)
+ dmac_map_area(vaddr, size, DMA_FROM_DEVICE);
+ else
+ dmac_map_area(vaddr, size, DMA_TO_DEVICE);
+}
+
+static void outer_cache_maint(unsigned int op, unsigned long paddr, size_t size)
+{
+ if (op == NVMAP_CACHE_OP_WB_INV)
+ outer_flush_range(paddr, paddr + size);
+ else if (op == NVMAP_CACHE_OP_INV)
+ outer_inv_range(paddr, paddr + size);
+ else
+ outer_clean_range(paddr, paddr + size);
+}
+
+static void heap_page_cache_maint(struct nvmap_client *client,
+ struct nvmap_handle *h, unsigned long start, unsigned long end,
+ unsigned int op, bool inner, bool outer, pte_t **pte,
+ unsigned long kaddr, pgprot_t prot)
+{
+ struct page *page;
+ unsigned long paddr;
+ unsigned long next;
+ unsigned long off;
+ size_t size;
+
+ while (start < end) {
+ page = h->pgalloc.pages[start >> PAGE_SHIFT];
+ next = min(((start + PAGE_SIZE) & PAGE_MASK), end);
+ off = start & ~PAGE_MASK;
+ size = next - start;
+ paddr = page_to_phys(page) + off;
+
+ if (inner) {
+ void *vaddr = (void *)kaddr + off;
+ BUG_ON(!pte);
+ BUG_ON(!kaddr);
+ set_pte_at(&init_mm, kaddr, *pte,
+ pfn_pte(__phys_to_pfn(paddr), prot));
+ flush_tlb_kernel_page(kaddr);
+ inner_cache_maint(op, vaddr, size);
+ }
+
+ if (outer)
+ outer_cache_maint(op, paddr, size);
+ start = next;
+ }
+}
+
+static bool fast_cache_maint(struct nvmap_client *client, struct nvmap_handle *h,
+ unsigned long start, unsigned long end, unsigned int op)
+{
+ int ret = false;
+
+ if ( (op == NVMAP_CACHE_OP_INV) ||
+ ((end - start) < FLUSH_CLEAN_BY_SET_WAY_THRESHOLD) )
+ goto out;
+
+ if (op == NVMAP_CACHE_OP_WB_INV) {
+ inner_flush_cache_all();
+ } else if (op == NVMAP_CACHE_OP_WB) {
+ inner_clean_cache_all();
+ }
+
+ if (h->heap_pgalloc && (h->flags != NVMAP_HANDLE_INNER_CACHEABLE)) {
+ heap_page_cache_maint(client, h, start, end, op,
+ false, true, NULL, 0, 0);
+ } else if (h->flags != NVMAP_HANDLE_INNER_CACHEABLE) {
+ start += h->carveout->base;
+ end += h->carveout->base;
+ outer_cache_maint(op, start, end - start);
+ }
+ ret = true;
+out:
+ return ret;
+}
+
static int cache_maint(struct nvmap_client *client, struct nvmap_handle *h,
unsigned long start, unsigned long end, unsigned int op)
{
- enum dma_data_direction dir;
pgprot_t prot;
pte_t **pte = NULL;
unsigned long kaddr;
@@ -501,26 +583,8 @@ static int cache_maint(struct nvmap_client *client, struct nvmap_handle *h,
start == end)
goto out;
- if (WARN_ON_ONCE(op == NVMAP_CACHE_OP_WB_INV))
- dir = DMA_BIDIRECTIONAL;
- else if (op == NVMAP_CACHE_OP_WB)
- dir = DMA_TO_DEVICE;
- else
- dir = DMA_FROM_DEVICE;
-
- if (h->heap_pgalloc) {
- while (start < end) {
- unsigned long next = (start + PAGE_SIZE) & PAGE_MASK;
- struct page *page;
-
- page = h->pgalloc.pages[start >> PAGE_SHIFT];
- next = min(next, end);
- __dma_page_cpu_to_dev(page, start & ~PAGE_MASK,
- next - start, dir);
- start = next;
- }
+ if (fast_cache_maint(client, h, start, end, op))
goto out;
- }
prot = nvmap_pgprot(h, pgprot_kernel);
pte = nvmap_alloc_pte(client->dev, (void **)&kaddr);
@@ -530,6 +594,13 @@ static int cache_maint(struct nvmap_client *client, struct nvmap_handle *h,
goto out;
}
+ if (h->heap_pgalloc) {
+ heap_page_cache_maint(client, h, start, end, op, true,
+ (h->flags == NVMAP_HANDLE_INNER_CACHEABLE) ? false : true,
+ pte, kaddr, prot);
+ goto out;
+ }
+
if (start > h->size || end > h->size) {
nvmap_warn(client, "cache maintenance outside handle\n");
return -EINVAL;
@@ -552,16 +623,13 @@ static int cache_maint(struct nvmap_client *client, struct nvmap_handle *h,
pfn_pte(__phys_to_pfn(loop), prot));
flush_tlb_kernel_page(kaddr);
- dmac_map_area(base, next - loop, dir);
+ inner_cache_maint(op, base, next - loop);
loop = next;
}
- if (h->flags != NVMAP_HANDLE_INNER_CACHEABLE) {
- if (dir != DMA_FROM_DEVICE)
- outer_clean_range(start, end);
- else
- outer_inv_range(start, end);
- }
+ if (h->flags != NVMAP_HANDLE_INNER_CACHEABLE)
+ outer_cache_maint(op, start, end - start);
+
/* unlock carveout */
nvmap_usecount_dec(h);