From 2fec5f985444f5f1fc91e8dea9d1a32d07e27a90 Mon Sep 17 00:00:00 2001 From: Max Krummenacher Date: Tue, 10 Nov 2015 20:05:42 +0100 Subject: glibc_2.21: force memcpy_arm to improve memcpy performance A meta-toolchain build or a build which does not use the angstrom distro builds glibc_2.21 rather than glibc_linaro-2.20. The patch to not use the slower memcpy implemented for NEON does not apply cleanly for both versions of glibc. This adds a bbappend for glibc_2.21 forcing memcpy_arm. --- ...don-t-use-optimized-for-VFP-NEON-versions.patch | 127 +++++++++++++++++++++ recipes-core/glibc/glibc_2.21.bbappend | 2 + 2 files changed, 129 insertions(+) create mode 100644 recipes-core/glibc/glibc-2.21/0001-memcpy-don-t-use-optimized-for-VFP-NEON-versions.patch create mode 100644 recipes-core/glibc/glibc_2.21.bbappend (limited to 'recipes-core') diff --git a/recipes-core/glibc/glibc-2.21/0001-memcpy-don-t-use-optimized-for-VFP-NEON-versions.patch b/recipes-core/glibc/glibc-2.21/0001-memcpy-don-t-use-optimized-for-VFP-NEON-versions.patch new file mode 100644 index 0000000..dd4964b --- /dev/null +++ b/recipes-core/glibc/glibc-2.21/0001-memcpy-don-t-use-optimized-for-VFP-NEON-versions.patch @@ -0,0 +1,127 @@ +From a086ff98ad0bb996241037689188caf394f6c633 Mon Sep 17 00:00:00 2001 +From: Max Krummenacher +Date: Fri, 12 Jun 2015 13:27:55 +0200 +Subject: [PATCH] memcpy: don't use optimized for VFP/NEON versions + +Tests with the tinymembench tool on a Colibri T30 show the performance +of the standard arm based memcpy to be around 2 times faster than +__memcpy_neon or __memcpy_vfp. + +Tests on Apalis iMX6 confirm this, although running only around 1.3 times faster. +--- + sysdeps/arm/armv7/multiarch/Makefile | 3 --- + sysdeps/arm/armv7/multiarch/ifunc-impl-list.c | 18 ------------------ + sysdeps/arm/armv7/multiarch/memcpy.S | 17 ----------------- + sysdeps/arm/armv7/multiarch/memcpy_neon.S | 9 --------- + sysdeps/arm/armv7/multiarch/memcpy_vfp.S | 7 ------- + 5 files changed, 54 deletions(-) + +diff --git a/sysdeps/arm/armv7/multiarch/Makefile b/sysdeps/arm/armv7/multiarch/Makefile +index e834cc9..e69de29 100644 +--- a/sysdeps/arm/armv7/multiarch/Makefile ++++ b/sysdeps/arm/armv7/multiarch/Makefile +@@ -1,3 +0,0 @@ +-ifeq ($(subdir),string) +-sysdep_routines += memcpy_neon memcpy_vfp +-endif +diff --git a/sysdeps/arm/armv7/multiarch/ifunc-impl-list.c b/sysdeps/arm/armv7/multiarch/ifunc-impl-list.c +index 2515418..322eae6 100644 +--- a/sysdeps/arm/armv7/multiarch/ifunc-impl-list.c ++++ b/sysdeps/arm/armv7/multiarch/ifunc-impl-list.c +@@ -31,25 +31,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + { + size_t i = 0; + +- bool use_neon = true; +-#ifdef __ARM_NEON__ +-# define __memcpy_neon memcpy +-#else +- use_neon = (GLRO(dl_hwcap) & HWCAP_ARM_NEON) != 0; +-#endif +- +-#ifndef __ARM_NEON__ +- bool use_vfp = true; +-# ifdef __SOFTFP__ +- use_vfp = (GLRO(dl_hwcap) & HWCAP_ARM_VFP) != 0; +-# endif +-#endif +- + IFUNC_IMPL (i, name, memcpy, +- IFUNC_IMPL_ADD (array, i, memcpy, use_neon, __memcpy_neon) +-#ifndef __ARM_NEON__ +- IFUNC_IMPL_ADD (array, i, memcpy, use_vfp, __memcpy_vfp) +-#endif + IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_arm)); + + return i; +diff --git a/sysdeps/arm/armv7/multiarch/memcpy.S b/sysdeps/arm/armv7/multiarch/memcpy.S +index c4f4e80..9ee4d73 100644 +--- a/sysdeps/arm/armv7/multiarch/memcpy.S ++++ b/sysdeps/arm/armv7/multiarch/memcpy.S +@@ -23,37 +23,20 @@ + #include + + #if IS_IN (libc) +-/* Under __ARM_NEON__, memcpy_neon.S defines the name memcpy. */ +-# ifndef __ARM_NEON__ + .text + ENTRY(memcpy) + .type memcpy, %gnu_indirect_function +-# ifdef __SOFTFP__ + ldr r1, .Lmemcpy_arm +- tst r0, #HWCAP_ARM_VFP +- ldrne r1, .Lmemcpy_vfp +-# else +- ldr r1, .Lmemcpy_vfp +-# endif +- tst r0, #HWCAP_ARM_NEON +- ldrne r1, .Lmemcpy_neon + 1: + add r0, r1, pc + DO_RET(lr) + +-# ifdef __SOFTFP__ + .Lmemcpy_arm: + .long C_SYMBOL_NAME(__memcpy_arm) - 1b - PC_OFS +-# endif +-.Lmemcpy_neon: +- .long C_SYMBOL_NAME(__memcpy_neon) - 1b - PC_OFS +-.Lmemcpy_vfp: +- .long C_SYMBOL_NAME(__memcpy_vfp) - 1b - PC_OFS + + END(memcpy) + + libc_hidden_builtin_def (memcpy) +-#endif /* Not __ARM_NEON__. */ + + /* These versions of memcpy are defined not to clobber any VFP or NEON + registers so they must always call the ARM variant of the memcpy code. */ +diff --git a/sysdeps/arm/armv7/multiarch/memcpy_neon.S b/sysdeps/arm/armv7/multiarch/memcpy_neon.S +index e60d1cc..e69de29 100644 +--- a/sysdeps/arm/armv7/multiarch/memcpy_neon.S ++++ b/sysdeps/arm/armv7/multiarch/memcpy_neon.S +@@ -1,9 +0,0 @@ +-#ifdef __ARM_NEON__ +-/* Under __ARM_NEON__, this file defines memcpy directly. */ +-libc_hidden_builtin_def (memcpy) +-#else +-# define memcpy __memcpy_neon +-#endif +- +-#define MEMCPY_NEON +-#include "memcpy_impl.S" +diff --git a/sysdeps/arm/armv7/multiarch/memcpy_vfp.S b/sysdeps/arm/armv7/multiarch/memcpy_vfp.S +index e008c04..e69de29 100644 +--- a/sysdeps/arm/armv7/multiarch/memcpy_vfp.S ++++ b/sysdeps/arm/armv7/multiarch/memcpy_vfp.S +@@ -1,7 +0,0 @@ +-/* Under __ARM_NEON__, memcpy_neon.S defines memcpy directly +- and the __memcpy_vfp code will never be used. */ +-#ifndef __ARM_NEON__ +-# define MEMCPY_VFP +-# define memcpy __memcpy_vfp +-# include "memcpy_impl.S" +-#endif +-- +1.9.3 + diff --git a/recipes-core/glibc/glibc_2.21.bbappend b/recipes-core/glibc/glibc_2.21.bbappend new file mode 100644 index 0000000..244841d --- /dev/null +++ b/recipes-core/glibc/glibc_2.21.bbappend @@ -0,0 +1,2 @@ +FILESEXTRAPATHS_prepend := "${THISDIR}/glibc-2.21:" +SRC_URI_append = " file://0001-memcpy-don-t-use-optimized-for-VFP-NEON-versions.patch" -- cgit v1.2.3