diff options
author | vdumpa <vdumpa@nvidia.com> | 2011-02-11 21:53:45 -0800 |
---|---|---|
committer | Dan Willemsen <dwillemsen@nvidia.com> | 2011-11-30 21:44:54 -0800 |
commit | b603a4119e1d42d9c24c189ce33b3709f9158042 (patch) | |
tree | 6d8a2ed44483e5eee96b4a915707717b193a6742 /arch/arm/mm | |
parent | 0276bde775b680204e2f27a9764790865dc83b7a (diff) |
tegra:video:nvmap: optimize cache_maint operation.
video:tegra:nvmap: Clean whole L1 instead of cleaning by MVA
For large allocations, cleaning each page of the allocation can
take a significant amount of time. If an allocation that nvmap needs
to clean or invalidate out of the cache is significantly larger than
the cache, just flush the entire cache by set/ways.
bug 788967
Reviewed-on: http://git-master/r/19354
(cherry picked from commit c01c12e63b1476501204152356867aeb5091fb80)
tegra:video:nvmap: optimize cache_maint operation.
optimize cache_maint operation for carveout and heap memories.
flush carveout memory allocations on memory free.
Bug 761637
Reviewed-on: http://git-master/r/21205
Conflicts:
drivers/video/tegra/nvmap/nvmap_dev.c
drivers/video/tegra/nvmap/nvmap_heap.c
drivers/video/tegra/nvmap/nvmap_ioctl.c
(cherry picked from commit 731df4df5e895e1d4999359d6d5939fc2095f883)
tegra:video:nvmap: optimize cache flush for system heap pages.
optimize cache flush for pages allocated from system heap.
Bug 788187
Reviewed-on: http://git-master/r/21687
(cherry picked from commit 3f318911ad91410aed53c90494210e2b8f74308b)
Original-Change-Id: Ia7b90ba0b50acfef1b88dd8095219c51733e027f
Reviewed-on: http://git-master/r/23465
Reviewed-by: Kirill Artamonov <kartamonov@nvidia.com>
Tested-by: Kirill Artamonov <kartamonov@nvidia.com>
Reviewed-by: Bharat Nihalani <bnihalani@nvidia.com>
Rebase-Id: R04f618f88ed1d2c7a680d51a8c5113f42de3f667
Diffstat (limited to 'arch/arm/mm')
-rw-r--r-- | arch/arm/mm/cache-v7.S | 55 |
1 files changed, 41 insertions, 14 deletions
diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S index 07c4bc8ea0a4..963325eb083e 100644 --- a/arch/arm/mm/cache-v7.S +++ b/arch/arm/mm/cache-v7.S @@ -33,27 +33,28 @@ ENTRY(v7_flush_icache_all) ENDPROC(v7_flush_icache_all) /* - * v7_flush_dcache_all() + * v7_op_dcache_all op * - * Flush the whole D-cache. + * op=c14, Flush the whole D-cache. + * op=c10, Clean the whole D-cache. * * Corrupted registers: r0-r7, r9-r11 (r6 only in Thumb mode) * * - mm - mm_struct describing address space */ -ENTRY(v7_flush_dcache_all) +.macro v7_op_dcache_all op @ op=c10 clean, op=c14 flush dmb @ ensure ordering with previous memory accesses mrc p15, 1, r0, c0, c0, 1 @ read clidr ands r3, r0, #0x7000000 @ extract loc from clidr mov r3, r3, lsr #23 @ left align loc bit field - beq finished @ if loc is 0, then no need to clean + beq 1005f @ if loc is 0, then no need to clean mov r10, #0 @ start clean at cache level 0 -loop1: +1001: add r2, r10, r10, lsr #1 @ work out 3x current cache level mov r1, r0, lsr r2 @ extract cache type bits from clidr and r1, r1, #7 @ mask of the bits for current cache only cmp r1, #2 @ see what cache we have at this level - blt skip @ skip if no cache, or just i-cache + blt 1004f @ skip if no cache, or just i-cache mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr isb @ isb to sych the new cssr&csidr mrc p15, 1, r1, c0, c0, 0 @ read the new csidr @@ -64,32 +65,40 @@ loop1: clz r5, r4 @ find bit position of way size increment ldr r7, =0x7fff ands r7, r7, r1, lsr #13 @ extract max number of the index size -loop2: +1002: mov r9, r4 @ create working copy of max way size -loop3: +1003: ARM( orr r11, r10, r9, lsl r5 ) @ factor way and cache number into r11 THUMB( lsl r6, r9, r5 ) THUMB( orr r11, r10, r6 ) @ factor way and cache number into r11 ARM( orr r11, r11, r7, lsl r2 ) @ factor index number into r11 THUMB( lsl r6, r7, r2 ) THUMB( orr r11, r11, r6 ) @ factor index number into r11 - mcr p15, 0, r11, c7, c14, 2 @ clean & invalidate by set/way + mcr p15, 0, r11, c7, \op, 2 @ op=c10/c14, clean/flush by set/way subs r9, r9, #1 @ decrement the way - bge loop3 + bge 1003b subs r7, r7, #1 @ decrement the index - bge loop2 -skip: + bge 1002b +1004: add r10, r10, #2 @ increment cache number cmp r3, r10 - bgt loop1 -finished: + bgt 1001b +1005: mov r10, #0 @ swith back to cache level 0 mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr dsb isb mov pc, lr +.endm + +ENTRY(v7_flush_dcache_all) + v7_op_dcache_all c14 ENDPROC(v7_flush_dcache_all) +ENTRY(v7_clean_dcache_all) + v7_op_dcache_all c10 +ENDPROC(v7_clean_dcache_all) + /* * v7_flush_cache_all() * @@ -114,6 +123,24 @@ ENTRY(v7_flush_kern_cache_all) ENDPROC(v7_flush_kern_cache_all) /* + * v7_clean_kern_cache_all() + */ +ENTRY(v7_clean_kern_cache_all) + ARM( stmfd sp!, {r4-r5, r7, r9-r11, lr} ) + THUMB( stmfd sp!, {r4-r7, r9-r11, lr} ) + bl v7_clean_dcache_all + mov r0, #0 +#ifdef CONFIG_SMP + mcr p15, 0, r0, c7, c1, 0 @ invalidate I-cache inner shareable +#else + mcr p15, 0, r0, c7, c5, 0 @ I+BTB cache invalidate +#endif + ARM( ldmfd sp!, {r4-r5, r7, r9-r11, lr} ) + THUMB( ldmfd sp!, {r4-r7, r9-r11, lr} ) + mov pc, lr +ENDPROC(v7_clean_kern_cache_all) + +/* * v7_flush_cache_all() * * Flush all TLB entries in a particular address space |