|author||Nicolas Pitre <firstname.lastname@example.org>||2013-03-12 13:00:42 +0100|
|committer||Max Krummenacher <email@example.com>||2014-07-28 18:19:49 +0200|
ARM: 7670/1: fix the memset fix
Commit 455bd4c430b0 ("ARM: 7668/1: fix memset-related crashes caused by recent GCC (4.7.2) optimizations") attempted to fix a compliance issue with the memset return value. However the memset itself became broken by that patch for misaligned pointers. This fixes the above by branching over the entry code from the misaligned fixup code to avoid reloading the original pointer. Also, because the function entry alignment is wrong in the Thumb mode compilation, that fixup code is moved to the end. While at it, the entry instructions are slightly reworked to help dual issue pipelines. Signed-off-by: Nicolas Pitre <firstname.lastname@example.org> Tested-by: Alexander Holler <email@example.com> Signed-off-by: Russell King <firstname.lastname@example.org>
1 files changed, 13 insertions, 20 deletions
diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S
index d912e73..94b0650 100644
@@ -14,31 +14,15 @@
- .word 0
-1: subs r2, r2, #4 @ 1 do we have enough
- blt 5f @ 1 bytes to align with?
- cmp r3, #2 @ 1
- strltb r1, [ip], #1 @ 1
- strleb r1, [ip], #1 @ 1
- strb r1, [ip], #1 @ 1
- add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3))
- * The pointer is now aligned and the length is adjusted. Try doing the
- * memset again.
- * Preserve the contents of r0 for the return value.
- mov ip, r0
- ands r3, ip, #3 @ 1 unaligned?
- bne 1b @ 1
+ ands r3, r0, #3 @ 1 unaligned?
+ mov ip, r0 @ preserve r0 as return value
+ bne 6f @ 1
* we know that the pointer in ip is aligned to a word boundary.
- orr r1, r1, r1, lsl #8
+1: orr r1, r1, r1, lsl #8
orr r1, r1, r1, lsl #16
mov r3, r1
cmp r2, #16
@@ -127,4 +111,13 @@ ENTRY(memset)
tst r2, #1
strneb r1, [ip], #1
mov pc, lr
+6: subs r2, r2, #4 @ 1 do we have enough
+ blt 5b @ 1 bytes to align with?
+ cmp r3, #2 @ 1
+ strltb r1, [ip], #1 @ 1
+ strleb r1, [ip], #1 @ 1
+ strb r1, [ip], #1 @ 1
+ add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3))
+ b 1b