summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorvdumpa <vdumpa@nvidia.com>2011-02-11 21:53:45 -0800
committerVarun Colbert <vcolbert@nvidia.com>2011-03-18 15:57:35 -0800
commita2ec163cba01aec15cce0dda1b989f8b7acf1f4b (patch)
tree8b17e16edacdda19e54c8407a6ed0e30b03a0965 /arch
parent8e4f39d9d195facc80d89016806bf45690f4f514 (diff)
tegra:video:nvmap: optimize cache_maint operation.
video:tegra:nvmap: Clean whole L1 instead of cleaning by MVA For large allocations, cleaning each page of the allocation can take a significant amount of time. If an allocation that nvmap needs to clean or invalidate out of the cache is significantly larger than the cache, just flush the entire cache by set/ways. bug 788967 Reviewed-on: http://git-master/r/19354 (cherry picked from commit c01c12e63b1476501204152356867aeb5091fb80) tegra:video:nvmap: optimize cache_maint operation. optimize cache_maint operation for carveout and heap memories. flush carveout memory allocations on memory free. Bug 761637 Reviewed-on: http://git-master/r/21205 Conflicts: drivers/video/tegra/nvmap/nvmap_dev.c drivers/video/tegra/nvmap/nvmap_heap.c drivers/video/tegra/nvmap/nvmap_ioctl.c (cherry picked from commit 731df4df5e895e1d4999359d6d5939fc2095f883) tegra:video:nvmap: optimize cache flush for system heap pages. optimize cache flush for pages allocated from system heap. Bug 788187 Reviewed-on: http://git-master/r/21687 (cherry picked from commit 3f318911ad91410aed53c90494210e2b8f74308b) Change-Id: Ia7b90ba0b50acfef1b88dd8095219c51733e027f Reviewed-on: http://git-master/r/23465 Reviewed-by: Kirill Artamonov <kartamonov@nvidia.com> Tested-by: Kirill Artamonov <kartamonov@nvidia.com> Reviewed-by: Bharat Nihalani <bnihalani@nvidia.com>
Diffstat (limited to 'arch')
-rw-r--r--arch/arm/mm/cache-v7.S55
1 files changed, 41 insertions, 14 deletions
diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S
index 37c8157e116e..c8c823953a34 100644
--- a/arch/arm/mm/cache-v7.S
+++ b/arch/arm/mm/cache-v7.S
@@ -18,27 +18,28 @@
#include "proc-macros.S"
/*
- * v7_flush_dcache_all()
+ * v7_op_dcache_all op
*
- * Flush the whole D-cache.
+ * op=c14, Flush the whole D-cache.
+ * op=c10, Clean the whole D-cache.
*
* Corrupted registers: r0-r7, r9-r11 (r6 only in Thumb mode)
*
* - mm - mm_struct describing address space
*/
-ENTRY(v7_flush_dcache_all)
+.macro v7_op_dcache_all op @ op=c10 clean, op=c14 flush
dmb @ ensure ordering with previous memory accesses
mrc p15, 1, r0, c0, c0, 1 @ read clidr
ands r3, r0, #0x7000000 @ extract loc from clidr
mov r3, r3, lsr #23 @ left align loc bit field
- beq finished @ if loc is 0, then no need to clean
+ beq 1005f @ if loc is 0, then no need to clean
mov r10, #0 @ start clean at cache level 0
-loop1:
+1001:
add r2, r10, r10, lsr #1 @ work out 3x current cache level
mov r1, r0, lsr r2 @ extract cache type bits from clidr
and r1, r1, #7 @ mask of the bits for current cache only
cmp r1, #2 @ see what cache we have at this level
- blt skip @ skip if no cache, or just i-cache
+ blt 1004f @ skip if no cache, or just i-cache
mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr
isb @ isb to sych the new cssr&csidr
mrc p15, 1, r1, c0, c0, 0 @ read the new csidr
@@ -49,32 +50,40 @@ loop1:
clz r5, r4 @ find bit position of way size increment
ldr r7, =0x7fff
ands r7, r7, r1, lsr #13 @ extract max number of the index size
-loop2:
+1002:
mov r9, r4 @ create working copy of max way size
-loop3:
+1003:
ARM( orr r11, r10, r9, lsl r5 ) @ factor way and cache number into r11
THUMB( lsl r6, r9, r5 )
THUMB( orr r11, r10, r6 ) @ factor way and cache number into r11
ARM( orr r11, r11, r7, lsl r2 ) @ factor index number into r11
THUMB( lsl r6, r7, r2 )
THUMB( orr r11, r11, r6 ) @ factor index number into r11
- mcr p15, 0, r11, c7, c14, 2 @ clean & invalidate by set/way
+ mcr p15, 0, r11, c7, \op, 2 @ op=c10/c14, clean/flush by set/way
subs r9, r9, #1 @ decrement the way
- bge loop3
+ bge 1003b
subs r7, r7, #1 @ decrement the index
- bge loop2
-skip:
+ bge 1002b
+1004:
add r10, r10, #2 @ increment cache number
cmp r3, r10
- bgt loop1
-finished:
+ bgt 1001b
+1005:
mov r10, #0 @ swith back to cache level 0
mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr
dsb
isb
mov pc, lr
+.endm
+
+ENTRY(v7_flush_dcache_all)
+ v7_op_dcache_all c14
ENDPROC(v7_flush_dcache_all)
+ENTRY(v7_clean_dcache_all)
+ v7_op_dcache_all c10
+ENDPROC(v7_clean_dcache_all)
+
/*
* v7_flush_cache_all()
*
@@ -102,6 +111,24 @@ ENTRY(v7_flush_kern_cache_all)
ENDPROC(v7_flush_kern_cache_all)
/*
+ * v7_clean_kern_cache_all()
+ */
+ENTRY(v7_clean_kern_cache_all)
+ ARM( stmfd sp!, {r4-r5, r7, r9-r11, lr} )
+ THUMB( stmfd sp!, {r4-r7, r9-r11, lr} )
+ bl v7_clean_dcache_all
+ mov r0, #0
+#ifdef CONFIG_SMP
+ mcr p15, 0, r0, c7, c1, 0 @ invalidate I-cache inner shareable
+#else
+ mcr p15, 0, r0, c7, c5, 0 @ I+BTB cache invalidate
+#endif
+ ARM( ldmfd sp!, {r4-r5, r7, r9-r11, lr} )
+ THUMB( ldmfd sp!, {r4-r7, r9-r11, lr} )
+ mov pc, lr
+ENDPROC(v7_clean_kern_cache_all)
+
+/*
* v7_flush_cache_all()
*
* Flush all TLB entries in a particular address space