tegra:video:nvmap: optimize cache_maint operation.

video:tegra:nvmap: Clean whole L1 instead of cleaning by MVA For large allocations, cleaning each page of the allocation can take a significant amount of time. If an allocation that nvmap needs to clean or invalidate out of the cache is significantly larger than the cache, just flush the entire cache by set/ways. bug 788967 Reviewed-on: http://git-master/r/19354 (cherry picked from commit c01c12e63b1476501204152356867aeb5091fb80) tegra:video:nvmap: optimize cache_maint operation. optimize cache_maint operation for carveout and heap memories. flush carveout memory allocations on memory free. Bug 761637 Reviewed-on: http://git-master/r/21205 Conflicts: drivers/video/tegra/nvmap/nvmap_dev.c drivers/video/tegra/nvmap/nvmap_heap.c drivers/video/tegra/nvmap/nvmap_ioctl.c (cherry picked from commit 731df4df5e895e1d4999359d6d5939fc2095f883) tegra:video:nvmap: optimize cache flush for system heap pages. optimize cache flush for pages allocated from system heap. Bug 788187 Reviewed-on: http://git-master/r/21687 (cherry picked from commit 3f318911ad91410aed53c90494210e2b8f74308b) Change-Id: Ia7b90ba0b50acfef1b88dd8095219c51733e027f Reviewed-on: http://git-master/r/23465 Reviewed-by: Kirill Artamonov <kartamonov@nvidia.com> Tested-by: Kirill Artamonov <kartamonov@nvidia.com> Reviewed-by: Bharat Nihalani <bnihalani@nvidia.com>
author: vdumpa <vdumpa@nvidia.com> 2011-02-11 21:53:45 -0800
committer: Varun Colbert <vcolbert@nvidia.com> 2011-03-18 15:57:35 -0800
commit: a2ec163cba01aec15cce0dda1b989f8b7acf1f4b (patch)
tree: 8b17e16edacdda19e54c8407a6ed0e30b03a0965 /drivers
parent: 8e4f39d9d195facc80d89016806bf45690f4f514 (diff)
5 files changed, 207 insertions, 48 deletions
diff --git a/drivers/video/tegra/nvmap/nvmap_common.h b/drivers/video/tegra/nvmap/nvmap_common.h
new file mode 100644
index 000000000000..20d27fa955bb
--- /dev/null
+++ b/drivers/video/tegra/nvmap/nvmap_common.h
@@ -0,0 +1,36 @@
+/*
+ * drivers/video/tegra/nvmap/nvmap_common.h
+ *
+ * GPU memory management driver for Tegra
+ *
+ * Copyright (c) 2011, NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *'
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+extern void v7_flush_kern_cache_all(void *);
+extern void v7_clean_kern_cache_all(void *);
+
+#define FLUSH_CLEAN_BY_SET_WAY_THRESHOLD (8 * PAGE_SIZE)
+
+static inline void inner_flush_cache_all(void)
+{
+	on_each_cpu(v7_flush_kern_cache_all, NULL, 1);
+}
+
+static inline void inner_clean_cache_all(void)
+{
+	on_each_cpu(v7_clean_kern_cache_all, NULL, 1);
+}
diff --git a/drivers/video/tegra/nvmap/nvmap_dev.c b/drivers/video/tegra/nvmap/nvmap_dev.c
index ed97228d0d63..5d63dbc4fbc9 100644
--- a/drivers/video/tegra/nvmap/nvmap_dev.c
+++ b/drivers/video/tegra/nvmap/nvmap_dev.c
@@ -3,7 +3,7 @@
  *
  * User-space interface to nvmap
  *
- * Copyright (c) 2010, NVIDIA Corporation.
+ * Copyright (c) 2011, NVIDIA Corporation.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -44,6 +44,7 @@
 #include "nvmap.h"
 #include "nvmap_ioctl.h"
 #include "nvmap_mru.h"
+#include "nvmap_common.h"
 
 #define NVMAP_NUM_PTES		64
 #define NVMAP_CARVEOUT_KILLER_RETRY_TIME 100 /* msecs */
@@ -250,8 +251,30 @@ unsigned long nvmap_carveout_usage(struct nvmap_client *c,
 	return 0;
 }
 
-static int nvmap_flush_heap_block(struct nvmap_client *client,
-				  struct nvmap_heap_block *block, size_t len)
+/*
+ * This routine is used to flush the carveout memory from cache.
+ * Why cache flush is needed for carveout? Consider the case, where a piece of
+ * carveout is allocated as cached and released. After this, if the same memory is
+ * allocated for uncached request and the memory is not flushed out from cache.
+ * In this case, the client might pass this to H/W engine and it could start modify
+ * the memory. As this was cached earlier, it might have some portion of it in cache.
+ * During cpu request to read/write other memory, the cached portion of this memory
+ * might get flushed back to main memory and would cause corruptions, if it happens
+ * after H/W writes data to memory.
+ *
+ * But flushing out the memory blindly on each carveout allocation is redundant.
+ *
+ * In order to optimize the carveout buffer cache flushes, the following
+ * strategy is used.
+ *
+ * The whole Carveout is flushed out from cache during its initialization.
+ * During allocation, carveout buffers are not flused from cache.
+ * During deallocation, carveout buffers are flushed, if they were allocated as cached.
+ * if they were allocated as uncached/writecombined, no cache flush is needed.
+ * Just draining store buffers is enough.
+ */
+int nvmap_flush_heap_block(struct nvmap_client *client,
+	struct nvmap_heap_block *block, size_t len, unsigned int prot)
 {
 	pte_t **pte;
 	void *addr;
@@ -259,7 +282,17 @@ static int nvmap_flush_heap_block(struct nvmap_client *client,
 	unsigned long phys = block->base;
 	unsigned long end = block->base + len;
 
-	pte = nvmap_alloc_pte(client->dev, &addr);
+	if (prot == NVMAP_HANDLE_UNCACHEABLE || prot == NVMAP_HANDLE_WRITE_COMBINE)
+		goto out;
+
+	if ( len >= FLUSH_CLEAN_BY_SET_WAY_THRESHOLD ) {
+		inner_flush_cache_all();
+		if (prot != NVMAP_HANDLE_INNER_CACHEABLE)
+			outer_flush_range(block->base, block->base + len);
+		goto out;
+	}
+
+	pte = nvmap_alloc_pte((client ? client->dev : nvmap_dev), &addr);
 	if (IS_ERR(pte))
 		return PTR_ERR(pte);
 
@@ -277,9 +310,12 @@ static int nvmap_flush_heap_block(struct nvmap_client *client,
 		phys = next;
 	}
 
-	outer_flush_range(block->base, block->base + len);
+	if (prot != NVMAP_HANDLE_INNER_CACHEABLE)
+		outer_flush_range(block->base, block->base + len);
 
-	nvmap_free_pte(client->dev, pte);
+	nvmap_free_pte((client ? client->dev: nvmap_dev), pte);
+out:
+	wmb();
 	return 0;
 }
 
@@ -421,13 +457,6 @@ struct nvmap_heap_block *do_nvmap_carveout_alloc(struct nvmap_client *client,
 		block = nvmap_heap_alloc(co_heap->carveout, len,
 					align, prot, handle);
 		if (block) {
-			/* flush any stale data that may be left in the
-			 * cache at the block's address, since the new
-			 * block may be mapped uncached */
-			if (nvmap_flush_heap_block(client, block, len)) {
-				nvmap_heap_free(block);
-				block = NULL;
-			}
 			return block;
 		}
 	}
diff --git a/drivers/video/tegra/nvmap/nvmap_handle.c b/drivers/video/tegra/nvmap/nvmap_handle.c
index dc3be30ca2f5..a9150a36cf2a 100644
--- a/drivers/video/tegra/nvmap/nvmap_handle.c
+++ b/drivers/video/tegra/nvmap/nvmap_handle.c
@@ -37,6 +37,7 @@
 
 #include "nvmap.h"
 #include "nvmap_mru.h"
+#include "nvmap_common.h"
 
 #define NVMAP_SECURE_HEAPS	(NVMAP_HEAP_CARVEOUT_IRAM | NVMAP_HEAP_IOVMM)
 #ifdef CONFIG_NVMAP_HIGHMEM_ONLY
@@ -107,7 +108,8 @@ out:
 
 extern void __flush_dcache_page(struct address_space *, struct page *);
 
-static struct page *nvmap_alloc_pages_exact(gfp_t gfp, size_t size)
+static struct page *nvmap_alloc_pages_exact(gfp_t gfp,
+	size_t size, bool flush_inner)
 {
 	struct page *page, *p, *e;
 	unsigned int order;
@@ -127,8 +129,10 @@ static struct page *nvmap_alloc_pages_exact(gfp_t gfp, size_t size)
 		__free_page(p);
 
 	e = page + (size >> PAGE_SHIFT);
-	for (p = page; p < e; p++)
-		__flush_dcache_page(page_mapping(p), p);
+	if (flush_inner) {
+		for (p = page; p < e; p++)
+			__flush_dcache_page(page_mapping(p), p);
+	}
 
 	base = page_to_phys(page);
 	outer_flush_range(base, base + size);
@@ -143,6 +147,7 @@ static int handle_page_alloc(struct nvmap_client *client,
 	pgprot_t prot;
 	unsigned int i = 0;
 	struct page **pages;
+	bool flush_inner = true;
 
 	pages = altalloc(nr_page * sizeof(*pages));
 	if (!pages)
@@ -155,10 +160,14 @@ static int handle_page_alloc(struct nvmap_client *client,
 		contiguous = true;
 #endif
 
+	if (size >= FLUSH_CLEAN_BY_SET_WAY_THRESHOLD) {
+		inner_flush_cache_all();
+		flush_inner = false;
+	}
 	h->pgalloc.area = NULL;
 	if (contiguous) {
 		struct page *page;
-		page = nvmap_alloc_pages_exact(GFP_NVMAP, size);
+		page = nvmap_alloc_pages_exact(GFP_NVMAP, size, flush_inner);
 		if (!page)
 			goto fail;
 
@@ -167,7 +176,8 @@ static int handle_page_alloc(struct nvmap_client *client,
 
 	} else {
 		for (i = 0; i < nr_page; i++) {
-			pages[i] = nvmap_alloc_pages_exact(GFP_NVMAP, PAGE_SIZE);
+			pages[i] = nvmap_alloc_pages_exact(GFP_NVMAP, PAGE_SIZE,
+				flush_inner);
 			if (!pages[i])
 				goto fail;
 		}
@@ -193,6 +203,7 @@ fail:
 	while (i--)
 		__free_page(pages[i]);
 	altfree(pages, nr_page * sizeof(*pages));
+	wmb();
 	return -ENOMEM;
 }
 
diff --git a/drivers/video/tegra/nvmap/nvmap_heap.c b/drivers/video/tegra/nvmap/nvmap_heap.c
index c920048db82b..a0a574d78944 100644
--- a/drivers/video/tegra/nvmap/nvmap_heap.c
+++ b/drivers/video/tegra/nvmap/nvmap_heap.c
@@ -3,7 +3,7 @@
  *
  * GPU heap allocator.
  *
- * Copyright (c) 2010, NVIDIA Corporation.
+ * Copyright (c) 2011, NVIDIA Corporation.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -31,6 +31,7 @@
 #include <mach/nvmap.h>
 #include "nvmap.h"
 #include "nvmap_heap.h"
+#include "nvmap_common.h"
 
 #include <asm/tlbflush.h>
 #include <asm/cacheflush.h>
@@ -887,6 +888,9 @@ struct nvmap_heap_block *nvmap_heap_alloc(struct nvmap_heap *h, size_t len,
 
 struct nvmap_heap *nvmap_block_to_heap(struct nvmap_heap_block *b)
 {
+	struct buddy_heap *bh = NULL;
+	struct nvmap_heap *h;
+
 	if (b->type == BLOCK_BUDDY) {
 		struct buddy_block *bb;
 		bb = container_of(b, struct buddy_block, block);
@@ -898,17 +902,24 @@ struct nvmap_heap *nvmap_block_to_heap(struct nvmap_heap_block *b)
 	}
 }
 
+int nvmap_flush_heap_block(struct nvmap_client *client,
+	struct nvmap_heap_block *block, size_t len, unsigned int prot);
+
 /* nvmap_heap_free: frees block b*/
 void nvmap_heap_free(struct nvmap_heap_block *b)
 {
 	struct buddy_heap *bh = NULL;
 	struct nvmap_heap *h = nvmap_block_to_heap(b);
+	struct list_block *lb;
 
 	mutex_lock(&h->lock);
 	if (b->type == BLOCK_BUDDY)
 		bh = do_buddy_free(b);
-	else
+	else {
+		lb = container_of(b, struct list_block, block);
+		nvmap_flush_heap_block(NULL, b, lb->size, lb->mem_prot);
 		do_heap_free(b);
+	}
 
 	if (bh) {
 		list_del(&bh->buddy_list);
@@ -1008,6 +1019,10 @@ struct nvmap_heap *nvmap_heap_create(struct device *parent, const char *name,
 	l->orig_addr = base;
 	list_add_tail(&l->free_list, &h->free_list);
 	list_add_tail(&l->all_list, &h->all_list);
+
+	inner_flush_cache_all();
+	outer_flush_range(base, base + len);
+	wmb();
 	return h;
 
 fail_register:
diff --git a/drivers/video/tegra/nvmap/nvmap_ioctl.c b/drivers/video/tegra/nvmap/nvmap_ioctl.c
index fb8c5ff00bdd..fc367c89ad45 100644
--- a/drivers/video/tegra/nvmap/nvmap_ioctl.c
+++ b/drivers/video/tegra/nvmap/nvmap_ioctl.c
@@ -3,7 +3,7 @@
  *
  * User-space interface to nvmap
  *
- * Copyright (c) 2010, NVIDIA Corporation.
+ * Copyright (c) 2011, NVIDIA Corporation.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -35,6 +35,7 @@
 
 #include "nvmap_ioctl.h"
 #include "nvmap.h"
+#include "nvmap_common.h"
 
 static ssize_t rw_handle(struct nvmap_client *client, struct nvmap_handle *h,
 			 int is_read, unsigned long h_offs,
@@ -477,10 +478,91 @@ int nvmap_ioctl_free(struct file *filp, unsigned long arg)
 	return 0;
 }
 
+static void inner_cache_maint(unsigned int op, void *vaddr, size_t size)
+{
+	if (op == NVMAP_CACHE_OP_WB_INV)
+		dmac_flush_range(vaddr, vaddr + size);
+	else if (op == NVMAP_CACHE_OP_INV)
+		dmac_map_area(vaddr, size, DMA_FROM_DEVICE);
+	else
+		dmac_map_area(vaddr, size, DMA_TO_DEVICE);
+}
+
+static void outer_cache_maint(unsigned int op, unsigned long paddr, size_t size)
+{
+	if (op == NVMAP_CACHE_OP_WB_INV)
+		outer_flush_range(paddr, paddr + size);
+	else if (op == NVMAP_CACHE_OP_INV)
+		outer_inv_range(paddr, paddr + size);
+	else
+		outer_clean_range(paddr, paddr + size);
+}
+
+static void heap_page_cache_maint(struct nvmap_client *client,
+	struct nvmap_handle *h, unsigned long start, unsigned long end,
+	unsigned int op, bool inner, bool outer, pte_t **pte,
+	unsigned long kaddr, pgprot_t prot)
+{
+	struct page *page;
+	unsigned long paddr;
+	unsigned long next;
+	unsigned long off;
+	size_t size;
+
+	while (start < end) {
+		page = h->pgalloc.pages[start >> PAGE_SHIFT];
+		next = min(((start + PAGE_SIZE) & PAGE_MASK), end);
+		off = start & ~PAGE_MASK;
+		size = next - start;
+		paddr = page_to_phys(page) + off;
+
+		if (inner) {
+			void *vaddr = (void *)kaddr + off;
+			BUG_ON(!pte);
+			BUG_ON(!kaddr);
+			set_pte_at(&init_mm, kaddr, *pte,
+				pfn_pte(__phys_to_pfn(paddr), prot));
+			flush_tlb_kernel_page(kaddr);
+			inner_cache_maint(op, vaddr, size);
+		}
+
+		if (outer)
+			outer_cache_maint(op, paddr, size);
+		start = next;
+	}
+}
+
+static bool fast_cache_maint(struct nvmap_client *client, struct nvmap_handle *h,
+	unsigned long start, unsigned long end, unsigned int op)
+{
+	int ret = false;
+
+	if ( (op == NVMAP_CACHE_OP_INV) ||
+		((end - start) < FLUSH_CLEAN_BY_SET_WAY_THRESHOLD) )
+		goto out;
+
+	if (op == NVMAP_CACHE_OP_WB_INV) {
+		inner_flush_cache_all();
+	} else if (op == NVMAP_CACHE_OP_WB) {
+		inner_clean_cache_all();
+	}
+
+	if (h->heap_pgalloc && (h->flags != NVMAP_HANDLE_INNER_CACHEABLE)) {
+		heap_page_cache_maint(client, h, start, end, op,
+				false, true, NULL, 0, 0);
+	} else if (h->flags != NVMAP_HANDLE_INNER_CACHEABLE) {
+		start += h->carveout->base;
+		end += h->carveout->base;
+		outer_cache_maint(op, start, end - start);
+	}
+	ret = true;
+out:
+	return ret;
+}
+
 static int cache_maint(struct nvmap_client *client, struct nvmap_handle *h,
 		       unsigned long start, unsigned long end, unsigned int op)
 {
-	enum dma_data_direction dir;
 	pgprot_t prot;
 	pte_t **pte = NULL;
 	unsigned long kaddr;
@@ -501,26 +583,8 @@ static int cache_maint(struct nvmap_client *client, struct nvmap_handle *h,
 	    start == end)
 		goto out;
 
-	if (WARN_ON_ONCE(op == NVMAP_CACHE_OP_WB_INV))
-		dir = DMA_BIDIRECTIONAL;
-	else if (op == NVMAP_CACHE_OP_WB)
-		dir = DMA_TO_DEVICE;
-	else
-		dir = DMA_FROM_DEVICE;
-
-	if (h->heap_pgalloc) {
-		while (start < end) {
-			unsigned long next = (start + PAGE_SIZE) & PAGE_MASK;
-			struct page *page;
-
-			page = h->pgalloc.pages[start >> PAGE_SHIFT];
-			next = min(next, end);
-			__dma_page_cpu_to_dev(page, start & ~PAGE_MASK,
-					      next - start, dir);
-			start = next;
-		}
+	if (fast_cache_maint(client, h, start, end, op))
 		goto out;
-	}
 
 	prot = nvmap_pgprot(h, pgprot_kernel);
 	pte = nvmap_alloc_pte(client->dev, (void **)&kaddr);
@@ -530,6 +594,13 @@ static int cache_maint(struct nvmap_client *client, struct nvmap_handle *h,
 		goto out;
 	}
 
+	if (h->heap_pgalloc) {
+		heap_page_cache_maint(client, h, start, end, op, true,
+			(h->flags == NVMAP_HANDLE_INNER_CACHEABLE) ? false : true,
+			pte, kaddr, prot);
+		goto out;
+	}
+
 	if (start > h->size || end > h->size) {
 		nvmap_warn(client, "cache maintenance outside handle\n");
 		return -EINVAL;
@@ -552,16 +623,13 @@ static int cache_maint(struct nvmap_client *client, struct nvmap_handle *h,
 			   pfn_pte(__phys_to_pfn(loop), prot));
 		flush_tlb_kernel_page(kaddr);
 
-		dmac_map_area(base, next - loop, dir);
+		inner_cache_maint(op, base, next - loop);
 		loop = next;
 	}
 
-	if (h->flags != NVMAP_HANDLE_INNER_CACHEABLE) {
-		if (dir != DMA_FROM_DEVICE)
-			outer_clean_range(start, end);
-		else
-			outer_inv_range(start, end);
-	}
+	if (h->flags != NVMAP_HANDLE_INNER_CACHEABLE)
+		outer_cache_maint(op, start, end - start);
+
 	/* unlock carveout */
 	nvmap_usecount_dec(h);
author	vdumpa <vdumpa@nvidia.com>	2011-02-11 21:53:45 -0800
committer	Varun Colbert <vcolbert@nvidia.com>	2011-03-18 15:57:35 -0800
commit	a2ec163cba01aec15cce0dda1b989f8b7acf1f4b (patch)
tree	8b17e16edacdda19e54c8407a6ed0e30b03a0965 /drivers
parent	8e4f39d9d195facc80d89016806bf45690f4f514 (diff)