From 71ae8aac3e198c6f3577cb7ad3a17f6128e97bfa Mon Sep 17 00:00:00 2001
From: Francesco Fusco <ffusco@redhat.com>
Date: Thu, 12 Dec 2013 16:09:05 +0100
Subject: lib: introduce arch optimized hash library

We introduce a new hashing library that is meant to be used in
the contexts where speed is more important than uniformity of the
hashed values. The hash library leverages architecture specific
implementation to achieve high performance and fall backs to
jhash() for the generic case.

On Intel-based x86 architectures, the library can exploit the crc32l
instruction, part of the Intel SSE4.2 instruction set, if the
instruction is supported by the processor. This implementation
is twice as fast as the jhash() implementation on an i7 processor.

Additional architectures, such as Arm64 provide instructions for
accelerating the computation of CRC, so they could be added as well
in follow-up work.

Signed-off-by: Francesco Fusco <ffusco@redhat.com>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: Thomas Graf <tgraf@redhat.com>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 lib/Makefile |  2 +-
 lib/hash.c   | 38 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 39 insertions(+), 1 deletion(-)
 create mode 100644 lib/hash.c

(limited to 'lib')

diff --git a/lib/Makefile b/lib/Makefile
index a459c31e8c6b..d0f79c547d97 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -26,7 +26,7 @@ obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
 	 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
 	 gcd.o lcm.o list_sort.o uuid.o flex_array.o iovec.o clz_ctz.o \
 	 bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o \
-	 percpu-refcount.o percpu_ida.o
+	 percpu-refcount.o percpu_ida.o hash.o
 obj-y += string_helpers.o
 obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o
 obj-y += kstrtox.o
diff --git a/lib/hash.c b/lib/hash.c
new file mode 100644
index 000000000000..b89f06a2d606
--- /dev/null
+++ b/lib/hash.c
@@ -0,0 +1,38 @@
+/* General purpose hashing library
+ *
+ * That's a start of a kernel hashing library, which can be extended
+ * with further algorithms in future. arch_fast_hash{2,}() will
+ * eventually resolve to an architecture optimized implementation.
+ *
+ * Copyright 2013 Francesco Fusco <ffusco@redhat.com>
+ * Copyright 2013 Daniel Borkmann <dborkman@redhat.com>
+ * Copyright 2013 Thomas Graf <tgraf@redhat.com>
+ * Licensed under the GNU General Public License, version 2.0 (GPLv2)
+ */
+
+#include <linux/jhash.h>
+#include <linux/hash.h>
+
+static struct fast_hash_ops arch_hash_ops __read_mostly = {
+	.hash  = jhash,
+	.hash2 = jhash2,
+};
+
+u32 arch_fast_hash(const void *data, u32 len, u32 seed)
+{
+	return arch_hash_ops.hash(data, len, seed);
+}
+EXPORT_SYMBOL_GPL(arch_fast_hash);
+
+u32 arch_fast_hash2(const u32 *data, u32 len, u32 seed)
+{
+	return arch_hash_ops.hash2(data, len, seed);
+}
+EXPORT_SYMBOL_GPL(arch_fast_hash2);
+
+static int __init hashlib_init(void)
+{
+	setup_arch_fast_hash(&arch_hash_ops);
+	return 0;
+}
+early_initcall(hashlib_init);
-- 
cgit v1.2.3


From 237217546d44fe06c16b8895eecaef22f18ba5ee Mon Sep 17 00:00:00 2001
From: Francesco Fusco <ffusco@redhat.com>
Date: Wed, 18 Dec 2013 16:05:48 +0100
Subject: lib: hash: follow-up fixups for arch hash

This patch adds the include file to pull in __read_mostly on some
architectures e.g. ppc and also fixes up signatures in generic
asm.

Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Francesco Fusco <ffusco@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 lib/hash.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'lib')

diff --git a/lib/hash.c b/lib/hash.c
index b89f06a2d606..fea973f4bd57 100644
--- a/lib/hash.c
+++ b/lib/hash.c
@@ -12,6 +12,7 @@
 
 #include <linux/jhash.h>
 #include <linux/hash.h>
+#include <linux/cache.h>
 
 static struct fast_hash_ops arch_hash_ops __read_mostly = {
 	.hash  = jhash,
-- 
cgit v1.2.3


From 03144b5869d2921dafbea477246aeb5d4eb5fc38 Mon Sep 17 00:00:00 2001
From: Michael Dalton <mwdalton@google.com>
Date: Thu, 16 Jan 2014 22:23:29 -0800
Subject: lib: Ensure EWMA does not store wrong intermediate values

To ensure ewma_read() without a lock returns a valid but possibly
out of date average, modify ewma_add() by using ACCESS_ONCE to prevent
intermediate wrong values from being written to avg->internal.

Suggested-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Michael Dalton <mwdalton@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 lib/average.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'lib')

diff --git a/lib/average.c b/lib/average.c
index 99a67e662b3c..114d1beae0c7 100644
--- a/lib/average.c
+++ b/lib/average.c
@@ -53,8 +53,10 @@ EXPORT_SYMBOL(ewma_init);
  */
 struct ewma *ewma_add(struct ewma *avg, unsigned long val)
 {
-	avg->internal = avg->internal  ?
-		(((avg->internal << avg->weight) - avg->internal) +
+	unsigned long internal = ACCESS_ONCE(avg->internal);
+
+	ACCESS_ONCE(avg->internal) = internal ?
+		(((internal << avg->weight) - internal) +
 			(val << avg->factor)) >> avg->weight :
 		(val << avg->factor);
 	return avg;
-- 
cgit v1.2.3


From 82ef3d5d5f3ffd757c960693c4fe7a0051211849 Mon Sep 17 00:00:00 2001
From: Weilong Chen <chenweilong@huawei.com>
Date: Thu, 16 Jan 2014 17:24:31 +0800
Subject: net: fix "queues" uevent between network namespaces

When I create a new namespace with 'ip netns add net0', or add/remove
new links in a namespace with 'ip link add/delete type veth', rx/tx
queues events can be got in all namespaces. That is because rx/tx queue
ktypes do not have namespace support, and their kobj parents are setted to
NULL. This patch is to fix it.

Reported-by: Libo Chen <chenlibo@huawei.com>
Signed-off-by: Libo Chen <chenlibo@huawei.com>
Signed-off-by: Weilong Chen <chenweilong@huawei.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 lib/kobject_uevent.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'lib')

diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index 52e5abbc41db..5f72767ddd9b 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -88,11 +88,17 @@ out:
 #ifdef CONFIG_NET
 static int kobj_bcast_filter(struct sock *dsk, struct sk_buff *skb, void *data)
 {
-	struct kobject *kobj = data;
+	struct kobject *kobj = data, *ksobj;
 	const struct kobj_ns_type_operations *ops;
 
 	ops = kobj_ns_ops(kobj);
-	if (ops) {
+	if (!ops && kobj->kset) {
+		ksobj = &kobj->kset->kobj;
+		if (ksobj->parent != NULL)
+			ops = kobj_ns_ops(ksobj->parent);
+	}
+
+	if (ops && ops->netlink_ns && kobj->ktype->namespace) {
 		const void *sock_ns, *ns;
 		ns = kobj->ktype->namespace(kobj);
 		sock_ns = ops->netlink_ns(dsk);
-- 
cgit v1.2.3


From 809fa972fd90ff27225294b17a027e908b2d7b7a Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Wed, 22 Jan 2014 02:29:41 +0100
Subject: reciprocal_divide: update/correction of the algorithm
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Jakub Zawadzki noticed that some divisions by reciprocal_divide()
were not correct [1][2], which he could also show with BPF code
after divisions are transformed into reciprocal_value() for runtime
invariance which can be passed to reciprocal_divide() later on;
reverse in BPF dump ended up with a different, off-by-one K in
some situations.

This has been fixed by Eric Dumazet in commit aee636c4809fa5
("bpf: do not use reciprocal divide"). This follow-up patch
improves reciprocal_value() and reciprocal_divide() to work in
all cases by using Granlund and Montgomery method, so that also
future use is safe and without any non-obvious side-effects.
Known problems with the old implementation were that division by 1
always returned 0 and some off-by-ones when the dividend and divisor
where very large. This seemed to not be problematic with its
current users, as far as we can tell. Eric Dumazet checked for
the slab usage, we cannot surely say so in the case of flex_array.
Still, in order to fix that, we propose an extension from the
original implementation from commit 6a2d7a955d8d resp. [3][4],
by using the algorithm proposed in "Division by Invariant Integers
Using Multiplication" [5], Torbjörn Granlund and Peter L.
Montgomery, that is, pseudocode for q = n/d where q, n, d is in
u32 universe:

1) Initialization:

  int l = ceil(log_2 d)
  uword m' = floor((1<<32)*((1<<l)-d)/d)+1
  int sh_1 = min(l,1)
  int sh_2 = max(l-1,0)

2) For q = n/d, all uword:

  uword t = (n*m')>>32
  q = (t+((n-t)>>sh_1))>>sh_2

The assembler implementation from Agner Fog [6] also helped a lot
while implementing. We have tested the implementation on x86_64,
ppc64, i686, s390x; on x86_64/haswell we're still half the latency
compared to normal divide.

Joint work with Daniel Borkmann.

  [1] http://www.wireshark.org/~darkjames/reciprocal-buggy.c
  [2] http://www.wireshark.org/~darkjames/set-and-dump-filter-k-bug.c
  [3] https://gmplib.org/~tege/division-paper.pdf
  [4] http://homepage.cs.uiowa.edu/~jones/bcd/divide.html
  [5] http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.2556
  [6] http://www.agner.org/optimize/asmlib.zip

Reported-by: Jakub Zawadzki <darkjames-ws@darkjames.pl>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Austin S Hemmelgarn <ahferroin7@gmail.com>
Cc: linux-kernel@vger.kernel.org
Cc: Jesse Gross <jesse@nicira.com>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Stephen Hemminger <stephen@networkplumber.org>
Cc: Matt Mackall <mpm@selenic.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Andy Gospodarek <andy@greyhouse.net>
Cc: Veaceslav Falico <vfalico@redhat.com>
Cc: Jay Vosburgh <fubar@us.ibm.com>
Cc: Jakub Zawadzki <darkjames-ws@darkjames.pl>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 lib/flex_array.c     |  7 ++++++-
 lib/reciprocal_div.c | 24 ++++++++++++++++++++----
 2 files changed, 26 insertions(+), 5 deletions(-)

(limited to 'lib')

diff --git a/lib/flex_array.c b/lib/flex_array.c
index 6948a6692fc4..2eed22fa507c 100644
--- a/lib/flex_array.c
+++ b/lib/flex_array.c
@@ -90,8 +90,8 @@ struct flex_array *flex_array_alloc(int element_size, unsigned int total,
 {
 	struct flex_array *ret;
 	int elems_per_part = 0;
-	int reciprocal_elems = 0;
 	int max_size = 0;
+	struct reciprocal_value reciprocal_elems = { 0 };
 
 	if (element_size) {
 		elems_per_part = FLEX_ARRAY_ELEMENTS_PER_PART(element_size);
@@ -119,6 +119,11 @@ EXPORT_SYMBOL(flex_array_alloc);
 static int fa_element_to_part_nr(struct flex_array *fa,
 					unsigned int element_nr)
 {
+	/*
+	 * if element_size == 0 we don't get here, so we never touch
+	 * the zeroed fa->reciprocal_elems, which would yield invalid
+	 * results
+	 */
 	return reciprocal_divide(element_nr, fa->reciprocal_elems);
 }
 
diff --git a/lib/reciprocal_div.c b/lib/reciprocal_div.c
index 75510e94f7d0..464152410c51 100644
--- a/lib/reciprocal_div.c
+++ b/lib/reciprocal_div.c
@@ -1,11 +1,27 @@
+#include <linux/kernel.h>
 #include <asm/div64.h>
 #include <linux/reciprocal_div.h>
 #include <linux/export.h>
 
-u32 reciprocal_value(u32 k)
+/*
+ * For a description of the algorithm please have a look at
+ * include/linux/reciprocal_div.h
+ */
+
+struct reciprocal_value reciprocal_value(u32 d)
 {
-	u64 val = (1LL << 32) + (k - 1);
-	do_div(val, k);
-	return (u32)val;
+	struct reciprocal_value R;
+	u64 m;
+	int l;
+
+	l = fls(d - 1);
+	m = ((1ULL << 32) * ((1ULL << l) - d));
+	do_div(m, d);
+	++m;
+	R.m = (u32)m;
+	R.sh1 = min(l, 1);
+	R.sh2 = max(l - 1, 0);
+
+	return R;
 }
 EXPORT_SYMBOL(reciprocal_value);
-- 
cgit v1.2.3