summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJames Hogan <jhogan@kernel.org>2017-12-07 07:20:46 +0000
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2018-03-03 10:19:46 +0100
commitcacdb0162489b42f6b46e132b4d08860aa9a07c9 (patch)
tree2ea3db577ac159519fb7464ac450af9a97c20942
parent37d866a9e5656b2c4e32817a8cb0ff212ad227e9 (diff)
MIPS: Implement __multi3 for GCC7 MIPS64r6 builds
commit ebabcf17bcd7ce968b1631ebe08236275698f39b upstream. GCC7 is a bit too eager to generate suboptimal __multi3 calls (128bit multiply with 128bit result) for MIPS64r6 builds, even in code which doesn't explicitly use 128bit types, such as the following: unsigned long func(unsigned long a, unsigned long b) { return a > (~0UL) / b; } Which GCC rearanges to: return (unsigned __int128)a * (unsigned __int128)b > 0xffffffffffffffff; Therefore implement __multi3, but only for MIPS64r6 with GCC7 as under normal circumstances we wouldn't expect any calls to __multi3 to be generated from kernel code. Reported-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com> Signed-off-by: James Hogan <jhogan@kernel.org> Tested-by: Waldemar Brodkorb <wbx@openadk.org> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: Maciej W. Rozycki <macro@mips.com> Cc: Matthew Fortune <matthew.fortune@mips.com> Cc: Florian Fainelli <florian@openwrt.org> Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/17890/ Cc: Guenter Roeck <linux@roeck-us.net> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-rw-r--r--arch/mips/lib/Makefile3
-rw-r--r--arch/mips/lib/libgcc.h17
-rw-r--r--arch/mips/lib/multi3.c54
3 files changed, 73 insertions, 1 deletions
diff --git a/arch/mips/lib/Makefile b/arch/mips/lib/Makefile
index 0344e575f522..fba4ca56e46a 100644
--- a/arch/mips/lib/Makefile
+++ b/arch/mips/lib/Makefile
@@ -15,4 +15,5 @@ obj-$(CONFIG_CPU_R3000) += r3k_dump_tlb.o
obj-$(CONFIG_CPU_TX39XX) += r3k_dump_tlb.o
# libgcc-style stuff needed in the kernel
-obj-y += ashldi3.o ashrdi3.o bswapsi.o bswapdi.o cmpdi2.o lshrdi3.o ucmpdi2.o
+obj-y += ashldi3.o ashrdi3.o bswapsi.o bswapdi.o cmpdi2.o lshrdi3.o multi3.o \
+ ucmpdi2.o
diff --git a/arch/mips/lib/libgcc.h b/arch/mips/lib/libgcc.h
index 05909d58e2fe..56ea0df60a44 100644
--- a/arch/mips/lib/libgcc.h
+++ b/arch/mips/lib/libgcc.h
@@ -9,10 +9,18 @@ typedef int word_type __attribute__ ((mode (__word__)));
struct DWstruct {
int high, low;
};
+
+struct TWstruct {
+ long long high, low;
+};
#elif defined(__LITTLE_ENDIAN)
struct DWstruct {
int low, high;
};
+
+struct TWstruct {
+ long long low, high;
+};
#else
#error I feel sick.
#endif
@@ -22,4 +30,13 @@ typedef union {
long long ll;
} DWunion;
+#if defined(CONFIG_64BIT) && defined(CONFIG_CPU_MIPSR6)
+typedef int ti_type __attribute__((mode(TI)));
+
+typedef union {
+ struct TWstruct s;
+ ti_type ti;
+} TWunion;
+#endif
+
#endif /* __ASM_LIBGCC_H */
diff --git a/arch/mips/lib/multi3.c b/arch/mips/lib/multi3.c
new file mode 100644
index 000000000000..111ad475aa0c
--- /dev/null
+++ b/arch/mips/lib/multi3.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/export.h>
+
+#include "libgcc.h"
+
+/*
+ * GCC 7 suboptimally generates __multi3 calls for mips64r6, so for that
+ * specific case only we'll implement it here.
+ *
+ * See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82981
+ */
+#if defined(CONFIG_64BIT) && defined(CONFIG_CPU_MIPSR6) && (__GNUC__ == 7)
+
+/* multiply 64-bit values, low 64-bits returned */
+static inline long long notrace dmulu(long long a, long long b)
+{
+ long long res;
+
+ asm ("dmulu %0,%1,%2" : "=r" (res) : "r" (a), "r" (b));
+ return res;
+}
+
+/* multiply 64-bit unsigned values, high 64-bits of 128-bit result returned */
+static inline long long notrace dmuhu(long long a, long long b)
+{
+ long long res;
+
+ asm ("dmuhu %0,%1,%2" : "=r" (res) : "r" (a), "r" (b));
+ return res;
+}
+
+/* multiply 128-bit values, low 128-bits returned */
+ti_type notrace __multi3(ti_type a, ti_type b)
+{
+ TWunion res, aa, bb;
+
+ aa.ti = a;
+ bb.ti = b;
+
+ /*
+ * a * b = (a.lo * b.lo)
+ * + 2^64 * (a.hi * b.lo + a.lo * b.hi)
+ * [+ 2^128 * (a.hi * b.hi)]
+ */
+ res.s.low = dmulu(aa.s.low, bb.s.low);
+ res.s.high = dmuhu(aa.s.low, bb.s.low);
+ res.s.high += dmulu(aa.s.high, bb.s.low);
+ res.s.high += dmulu(aa.s.low, bb.s.high);
+
+ return res.ti;
+}
+EXPORT_SYMBOL(__multi3);
+
+#endif /* 64BIT && CPU_MIPSR6 && GCC7 */