summaryrefslogtreecommitdiff
path: root/recipes-core/glibc/glibc/0001-memcpy-don-t-use-optimized-for-VFP-NEON-versions.patch
blob: 75be06ff4be853a58277c3fed84986b971eb5f1a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
From a086ff98ad0bb996241037689188caf394f6c633 Mon Sep 17 00:00:00 2001
From: Max Krummenacher <max.krummenacher@toradex.com>
Date: Fri, 12 Jun 2015 13:27:55 +0200
Subject: [PATCH] memcpy: don't use optimized for VFP/NEON versions

Tests with the tinymembench tool on a Colibri T30 show the performance
of the standard arm based memcpy to be around 2 times faster than
__memcpy_neon or __memcpy_vfp.

Tests on Apalis iMX6 confirm this, although running only around 1.3 times faster.
---
 sysdeps/arm/armv7/multiarch/Makefile          |  3 ---
 sysdeps/arm/armv7/multiarch/ifunc-impl-list.c | 18 ------------------
 sysdeps/arm/armv7/multiarch/memcpy.S          | 17 -----------------
 sysdeps/arm/armv7/multiarch/memcpy_neon.S     |  9 ---------
 sysdeps/arm/armv7/multiarch/memcpy_vfp.S      |  7 -------
 5 files changed, 54 deletions(-)

diff --git a/sysdeps/arm/armv7/multiarch/Makefile b/sysdeps/arm/armv7/multiarch/Makefile
index e834cc9..e69de29 100644
--- a/sysdeps/arm/armv7/multiarch/Makefile
+++ b/sysdeps/arm/armv7/multiarch/Makefile
@@ -1,3 +0,0 @@
-ifeq ($(subdir),string)
-sysdep_routines += memcpy_neon memcpy_vfp
-endif
diff --git a/sysdeps/arm/armv7/multiarch/ifunc-impl-list.c b/sysdeps/arm/armv7/multiarch/ifunc-impl-list.c
index 2515418..322eae6 100644
--- a/sysdeps/arm/armv7/multiarch/ifunc-impl-list.c
+++ b/sysdeps/arm/armv7/multiarch/ifunc-impl-list.c
@@ -31,25 +31,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 {
   size_t i = 0;
 
-  bool use_neon = true;
-#ifdef __ARM_NEON__
-# define __memcpy_neon	memcpy
-#else
-  use_neon = (GLRO(dl_hwcap) & HWCAP_ARM_NEON) != 0;
-#endif
-
-#ifndef __ARM_NEON__
-  bool use_vfp = true;
-# ifdef __SOFTFP__
-  use_vfp = (GLRO(dl_hwcap) & HWCAP_ARM_VFP) != 0;
-# endif
-#endif
-
   IFUNC_IMPL (i, name, memcpy,
-	      IFUNC_IMPL_ADD (array, i, memcpy, use_neon, __memcpy_neon)
-#ifndef __ARM_NEON__
-	      IFUNC_IMPL_ADD (array, i, memcpy, use_vfp, __memcpy_vfp)
-#endif
 	      IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_arm));
 
   return i;
diff --git a/sysdeps/arm/armv7/multiarch/memcpy.S b/sysdeps/arm/armv7/multiarch/memcpy.S
index c4f4e80..9ee4d73 100644
--- a/sysdeps/arm/armv7/multiarch/memcpy.S
+++ b/sysdeps/arm/armv7/multiarch/memcpy.S
@@ -23,37 +23,20 @@
 #include <rtld-global-offsets.h>
 
 #ifndef NOT_IN_libc
-/* Under __ARM_NEON__, memcpy_neon.S defines the name memcpy.  */
-# ifndef __ARM_NEON__
 	.text
 ENTRY(memcpy)
 	.type	memcpy, %gnu_indirect_function
-# ifdef __SOFTFP__
 	ldr	r1, .Lmemcpy_arm
-	tst	r0, #HWCAP_ARM_VFP
-	ldrne	r1, .Lmemcpy_vfp
-# else
-	ldr	r1, .Lmemcpy_vfp
-# endif
-	tst	r0, #HWCAP_ARM_NEON
-	ldrne	r1, .Lmemcpy_neon
 1:
 	add	r0, r1, pc
 	DO_RET(lr)
 
-# ifdef __SOFTFP__
 .Lmemcpy_arm:
 	.long	C_SYMBOL_NAME(__memcpy_arm) - 1b - PC_OFS
-# endif
-.Lmemcpy_neon:
-	.long	C_SYMBOL_NAME(__memcpy_neon) - 1b - PC_OFS
-.Lmemcpy_vfp:
-	.long	C_SYMBOL_NAME(__memcpy_vfp) - 1b - PC_OFS
 
 END(memcpy)
 
 libc_hidden_builtin_def (memcpy)
-#endif  /* Not __ARM_NEON__.  */
 
 /* These versions of memcpy are defined not to clobber any VFP or NEON
    registers so they must always call the ARM variant of the memcpy code.  */
diff --git a/sysdeps/arm/armv7/multiarch/memcpy_neon.S b/sysdeps/arm/armv7/multiarch/memcpy_neon.S
index e60d1cc..e69de29 100644
--- a/sysdeps/arm/armv7/multiarch/memcpy_neon.S
+++ b/sysdeps/arm/armv7/multiarch/memcpy_neon.S
@@ -1,9 +0,0 @@
-#ifdef __ARM_NEON__
-/* Under __ARM_NEON__, this file defines memcpy directly.  */
-libc_hidden_builtin_def (memcpy)
-#else
-# define memcpy __memcpy_neon
-#endif
-
-#define MEMCPY_NEON
-#include "memcpy_impl.S"
diff --git a/sysdeps/arm/armv7/multiarch/memcpy_vfp.S b/sysdeps/arm/armv7/multiarch/memcpy_vfp.S
index e008c04..e69de29 100644
--- a/sysdeps/arm/armv7/multiarch/memcpy_vfp.S
+++ b/sysdeps/arm/armv7/multiarch/memcpy_vfp.S
@@ -1,7 +0,0 @@
-/* Under __ARM_NEON__, memcpy_neon.S defines memcpy directly
-   and the __memcpy_vfp code will never be used.  */
-#ifndef __ARM_NEON__
-# define MEMCPY_VFP
-# define memcpy __memcpy_vfp
-# include "memcpy_impl.S"
-#endif
-- 
1.9.3