From 71ae8aac3e198c6f3577cb7ad3a17f6128e97bfa Mon Sep 17 00:00:00 2001 From: Francesco Fusco Date: Thu, 12 Dec 2013 16:09:05 +0100 Subject: lib: introduce arch optimized hash library We introduce a new hashing library that is meant to be used in the contexts where speed is more important than uniformity of the hashed values. The hash library leverages architecture specific implementation to achieve high performance and fall backs to jhash() for the generic case. On Intel-based x86 architectures, the library can exploit the crc32l instruction, part of the Intel SSE4.2 instruction set, if the instruction is supported by the processor. This implementation is twice as fast as the jhash() implementation on an i7 processor. Additional architectures, such as Arm64 provide instructions for accelerating the computation of CRC, so they could be added as well in follow-up work. Signed-off-by: Francesco Fusco Signed-off-by: Daniel Borkmann Signed-off-by: Thomas Graf Cc: linux-kernel@vger.kernel.org Signed-off-by: David S. Miller --- lib/Makefile | 2 +- lib/hash.c | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) create mode 100644 lib/hash.c (limited to 'lib') diff --git a/lib/Makefile b/lib/Makefile index a459c31e8c6b..d0f79c547d97 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -26,7 +26,7 @@ obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \ gcd.o lcm.o list_sort.o uuid.o flex_array.o iovec.o clz_ctz.o \ bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o \ - percpu-refcount.o percpu_ida.o + percpu-refcount.o percpu_ida.o hash.o obj-y += string_helpers.o obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o obj-y += kstrtox.o diff --git a/lib/hash.c b/lib/hash.c new file mode 100644 index 000000000000..b89f06a2d606 --- /dev/null +++ b/lib/hash.c @@ -0,0 +1,38 @@ +/* General purpose hashing library + * + * That's a start of a kernel hashing library, which can be extended + * with further algorithms in future. arch_fast_hash{2,}() will + * eventually resolve to an architecture optimized implementation. + * + * Copyright 2013 Francesco Fusco + * Copyright 2013 Daniel Borkmann + * Copyright 2013 Thomas Graf + * Licensed under the GNU General Public License, version 2.0 (GPLv2) + */ + +#include +#include + +static struct fast_hash_ops arch_hash_ops __read_mostly = { + .hash = jhash, + .hash2 = jhash2, +}; + +u32 arch_fast_hash(const void *data, u32 len, u32 seed) +{ + return arch_hash_ops.hash(data, len, seed); +} +EXPORT_SYMBOL_GPL(arch_fast_hash); + +u32 arch_fast_hash2(const u32 *data, u32 len, u32 seed) +{ + return arch_hash_ops.hash2(data, len, seed); +} +EXPORT_SYMBOL_GPL(arch_fast_hash2); + +static int __init hashlib_init(void) +{ + setup_arch_fast_hash(&arch_hash_ops); + return 0; +} +early_initcall(hashlib_init); -- cgit v1.2.3 From 237217546d44fe06c16b8895eecaef22f18ba5ee Mon Sep 17 00:00:00 2001 From: Francesco Fusco Date: Wed, 18 Dec 2013 16:05:48 +0100 Subject: lib: hash: follow-up fixups for arch hash This patch adds the include file to pull in __read_mostly on some architectures e.g. ppc and also fixes up signatures in generic asm. Reported-by: Fengguang Wu Signed-off-by: Francesco Fusco Signed-off-by: David S. Miller --- lib/hash.c | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/hash.c b/lib/hash.c index b89f06a2d606..fea973f4bd57 100644 --- a/lib/hash.c +++ b/lib/hash.c @@ -12,6 +12,7 @@ #include #include +#include static struct fast_hash_ops arch_hash_ops __read_mostly = { .hash = jhash, -- cgit v1.2.3 From 03144b5869d2921dafbea477246aeb5d4eb5fc38 Mon Sep 17 00:00:00 2001 From: Michael Dalton Date: Thu, 16 Jan 2014 22:23:29 -0800 Subject: lib: Ensure EWMA does not store wrong intermediate values To ensure ewma_read() without a lock returns a valid but possibly out of date average, modify ewma_add() by using ACCESS_ONCE to prevent intermediate wrong values from being written to avg->internal. Suggested-by: Eric Dumazet Acked-by: Michael S. Tsirkin Acked-by: Eric Dumazet Signed-off-by: Michael Dalton Signed-off-by: David S. Miller --- lib/average.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/average.c b/lib/average.c index 99a67e662b3c..114d1beae0c7 100644 --- a/lib/average.c +++ b/lib/average.c @@ -53,8 +53,10 @@ EXPORT_SYMBOL(ewma_init); */ struct ewma *ewma_add(struct ewma *avg, unsigned long val) { - avg->internal = avg->internal ? - (((avg->internal << avg->weight) - avg->internal) + + unsigned long internal = ACCESS_ONCE(avg->internal); + + ACCESS_ONCE(avg->internal) = internal ? + (((internal << avg->weight) - internal) + (val << avg->factor)) >> avg->weight : (val << avg->factor); return avg; -- cgit v1.2.3 From 82ef3d5d5f3ffd757c960693c4fe7a0051211849 Mon Sep 17 00:00:00 2001 From: Weilong Chen Date: Thu, 16 Jan 2014 17:24:31 +0800 Subject: net: fix "queues" uevent between network namespaces When I create a new namespace with 'ip netns add net0', or add/remove new links in a namespace with 'ip link add/delete type veth', rx/tx queues events can be got in all namespaces. That is because rx/tx queue ktypes do not have namespace support, and their kobj parents are setted to NULL. This patch is to fix it. Reported-by: Libo Chen Signed-off-by: Libo Chen Signed-off-by: Weilong Chen Acked-by: Greg Kroah-Hartman Signed-off-by: David S. Miller --- lib/kobject_uevent.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index 52e5abbc41db..5f72767ddd9b 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -88,11 +88,17 @@ out: #ifdef CONFIG_NET static int kobj_bcast_filter(struct sock *dsk, struct sk_buff *skb, void *data) { - struct kobject *kobj = data; + struct kobject *kobj = data, *ksobj; const struct kobj_ns_type_operations *ops; ops = kobj_ns_ops(kobj); - if (ops) { + if (!ops && kobj->kset) { + ksobj = &kobj->kset->kobj; + if (ksobj->parent != NULL) + ops = kobj_ns_ops(ksobj->parent); + } + + if (ops && ops->netlink_ns && kobj->ktype->namespace) { const void *sock_ns, *ns; ns = kobj->ktype->namespace(kobj); sock_ns = ops->netlink_ns(dsk); -- cgit v1.2.3 From 809fa972fd90ff27225294b17a027e908b2d7b7a Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Wed, 22 Jan 2014 02:29:41 +0100 Subject: reciprocal_divide: update/correction of the algorithm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Jakub Zawadzki noticed that some divisions by reciprocal_divide() were not correct [1][2], which he could also show with BPF code after divisions are transformed into reciprocal_value() for runtime invariance which can be passed to reciprocal_divide() later on; reverse in BPF dump ended up with a different, off-by-one K in some situations. This has been fixed by Eric Dumazet in commit aee636c4809fa5 ("bpf: do not use reciprocal divide"). This follow-up patch improves reciprocal_value() and reciprocal_divide() to work in all cases by using Granlund and Montgomery method, so that also future use is safe and without any non-obvious side-effects. Known problems with the old implementation were that division by 1 always returned 0 and some off-by-ones when the dividend and divisor where very large. This seemed to not be problematic with its current users, as far as we can tell. Eric Dumazet checked for the slab usage, we cannot surely say so in the case of flex_array. Still, in order to fix that, we propose an extension from the original implementation from commit 6a2d7a955d8d resp. [3][4], by using the algorithm proposed in "Division by Invariant Integers Using Multiplication" [5], Torbjörn Granlund and Peter L. Montgomery, that is, pseudocode for q = n/d where q, n, d is in u32 universe: 1) Initialization: int l = ceil(log_2 d) uword m' = floor((1<<32)*((1<>32 q = (t+((n-t)>>sh_1))>>sh_2 The assembler implementation from Agner Fog [6] also helped a lot while implementing. We have tested the implementation on x86_64, ppc64, i686, s390x; on x86_64/haswell we're still half the latency compared to normal divide. Joint work with Daniel Borkmann. [1] http://www.wireshark.org/~darkjames/reciprocal-buggy.c [2] http://www.wireshark.org/~darkjames/set-and-dump-filter-k-bug.c [3] https://gmplib.org/~tege/division-paper.pdf [4] http://homepage.cs.uiowa.edu/~jones/bcd/divide.html [5] http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.2556 [6] http://www.agner.org/optimize/asmlib.zip Reported-by: Jakub Zawadzki Cc: Eric Dumazet Cc: Austin S Hemmelgarn Cc: linux-kernel@vger.kernel.org Cc: Jesse Gross Cc: Jamal Hadi Salim Cc: Stephen Hemminger Cc: Matt Mackall Cc: Pekka Enberg Cc: Christoph Lameter Cc: Andy Gospodarek Cc: Veaceslav Falico Cc: Jay Vosburgh Cc: Jakub Zawadzki Signed-off-by: Daniel Borkmann Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- lib/flex_array.c | 7 ++++++- lib/reciprocal_div.c | 24 ++++++++++++++++++++---- 2 files changed, 26 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/flex_array.c b/lib/flex_array.c index 6948a6692fc4..2eed22fa507c 100644 --- a/lib/flex_array.c +++ b/lib/flex_array.c @@ -90,8 +90,8 @@ struct flex_array *flex_array_alloc(int element_size, unsigned int total, { struct flex_array *ret; int elems_per_part = 0; - int reciprocal_elems = 0; int max_size = 0; + struct reciprocal_value reciprocal_elems = { 0 }; if (element_size) { elems_per_part = FLEX_ARRAY_ELEMENTS_PER_PART(element_size); @@ -119,6 +119,11 @@ EXPORT_SYMBOL(flex_array_alloc); static int fa_element_to_part_nr(struct flex_array *fa, unsigned int element_nr) { + /* + * if element_size == 0 we don't get here, so we never touch + * the zeroed fa->reciprocal_elems, which would yield invalid + * results + */ return reciprocal_divide(element_nr, fa->reciprocal_elems); } diff --git a/lib/reciprocal_div.c b/lib/reciprocal_div.c index 75510e94f7d0..464152410c51 100644 --- a/lib/reciprocal_div.c +++ b/lib/reciprocal_div.c @@ -1,11 +1,27 @@ +#include #include #include #include -u32 reciprocal_value(u32 k) +/* + * For a description of the algorithm please have a look at + * include/linux/reciprocal_div.h + */ + +struct reciprocal_value reciprocal_value(u32 d) { - u64 val = (1LL << 32) + (k - 1); - do_div(val, k); - return (u32)val; + struct reciprocal_value R; + u64 m; + int l; + + l = fls(d - 1); + m = ((1ULL << 32) * ((1ULL << l) - d)); + do_div(m, d); + ++m; + R.m = (u32)m; + R.sh1 = min(l, 1); + R.sh2 = max(l - 1, 0); + + return R; } EXPORT_SYMBOL(reciprocal_value); -- cgit v1.2.3