From c84a3b27798dfce928b867fa1c9f3c3fd66f0a31 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 23 Nov 2013 18:01:46 -0500 Subject: sysfs: drop kobj_ns_type handling, take #2 The way namespace tags are implemented in sysfs is more complicated than necessary. As each tag is a pointer value and required to be non-NULL under a namespace enabled parent, there's no need to record separately what type each tag is. If multiple namespace types are needed, which currently aren't, we can simply compare the tag to a set of allowed tags in the superblock assuming that the tags, being pointers, won't have the same value across multiple types. This patch rips out kobj_ns_type handling from sysfs. sysfs now has an enable switch to turn on namespace under a node. If enabled, all children are required to have non-NULL namespace tags and filtered against the super_block's tag. kobject namespace determination is now performed in lib/kobject.c::create_dir() making sysfs_read_ns_type() unnecessary. The sanity checks are also moved. create_dir() is restructured to ease such addition. This removes most kobject namespace knowledge from sysfs proper which will enable proper separation and layering of sysfs. This is the second try. The first one was cb26a311578e ("sysfs: drop kobj_ns_type handling") which tried to automatically enable namespace if there are children with non-NULL namespace tags; however, it was broken for symlinks as they should inherit the target's tag iff namespace is enabled in the parent. This led to namespace filtering enabled incorrectly for wireless net class devices through phy80211 symlinks and thus network configuration failure. a1212d278c05 ("Revert "sysfs: drop kobj_ns_type handling"") reverted the commit. This shouldn't introduce any behavior changes, for real. v2: Dummy implementation of sysfs_enable_ns() for !CONFIG_SYSFS was missing and caused build failure. Reported by kbuild test robot. Signed-off-by: Tejun Heo Reported-by: Linus Torvalds Cc: Eric W. Biederman Cc: Kay Sievers Cc: Greg Kroah-Hartman Cc: kbuild test robot Signed-off-by: Greg Kroah-Hartman --- lib/kobject.c | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/kobject.c b/lib/kobject.c index 5b4b8886435e..16e9335b32d3 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -65,13 +65,17 @@ static int populate_dir(struct kobject *kobj) static int create_dir(struct kobject *kobj) { + const struct kobj_ns_type_operations *ops; int error; error = sysfs_create_dir_ns(kobj, kobject_namespace(kobj)); - if (!error) { - error = populate_dir(kobj); - if (error) - sysfs_remove_dir(kobj); + if (error) + return error; + + error = populate_dir(kobj); + if (error) { + sysfs_remove_dir(kobj); + return error; } /* @@ -80,7 +84,20 @@ static int create_dir(struct kobject *kobj) */ sysfs_get(kobj->sd); - return error; + /* + * If @kobj has ns_ops, its children need to be filtered based on + * their namespace tags. Enable namespace support on @kobj->sd. + */ + ops = kobj_child_ns_ops(kobj); + if (ops) { + BUG_ON(ops->type <= KOBJ_NS_TYPE_NONE); + BUG_ON(ops->type >= KOBJ_NS_TYPES); + BUG_ON(!kobj_ns_type_registered(ops->type)); + + sysfs_enable_ns(kobj->sd); + } + + return 0; } static int get_kobj_path_length(struct kobject *kobj) -- cgit v1.2.3 From 93b2b8e4aa4317e3fe6414d117deb5f3c362e8bb Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 28 Nov 2013 14:54:15 -0500 Subject: sysfs, kernfs: introduce kernfs_create_dir[_ns]() Introduce kernfs interface to manipulate a directory which takes and returns sysfs_dirents. create_dir() is renamed to kernfs_create_dir_ns() and its argumantes and return value are updated. create_dir() usages are replaced with kernfs_create_dir_ns() and sysfs_create_subdir() usages are replaced with kernfs_create_dir(). Dup warnings are handled explicitly by sysfs users of the kernfs interface. sysfs_enable_ns() is renamed to kernfs_enable_ns(). This patch doesn't introduce any behavior changes. v2: Dummy implementation for !CONFIG_SYSFS updated to return -ENOSYS. v3: kernfs_enable_ns() added. v4: Refreshed on top of "sysfs: drop kobj_ns_type handling, take #2" so that this patch removes sysfs_enable_ns(). Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- lib/kobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/kobject.c b/lib/kobject.c index 16e9335b32d3..b8d848fb1377 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -94,7 +94,7 @@ static int create_dir(struct kobject *kobj) BUG_ON(ops->type >= KOBJ_NS_TYPES); BUG_ON(!kobj_ns_type_registered(ops->type)); - sysfs_enable_ns(kobj->sd); + kernfs_enable_ns(kobj->sd); } return 0; -- cgit v1.2.3 From 89c86a64cd056e283323710c9ddf6f7090a450c8 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 5 Dec 2013 17:37:51 -0700 Subject: kobject: delay kobject release for random time When CONFIG_DEBUG_KOBJECT_RELEASE=y, delay kobject release functions for a random time between 1 and 8 seconds, which effectively changes the order in which they're called. Signed-off-by: Bjorn Helgaas Signed-off-by: Greg Kroah-Hartman --- lib/kobject.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/kobject.c b/lib/kobject.c index b8d848fb1377..1d110dc95db5 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -18,6 +18,7 @@ #include #include #include +#include /** * kobject_namespace - return @kobj's namespace tag @@ -642,10 +643,12 @@ static void kobject_release(struct kref *kref) { struct kobject *kobj = container_of(kref, struct kobject, kref); #ifdef CONFIG_DEBUG_KOBJECT_RELEASE - pr_info("kobject: '%s' (%p): %s, parent %p (delayed)\n", - kobject_name(kobj), kobj, __func__, kobj->parent); + unsigned long delay = HZ + HZ * (get_random_int() & 0x3); + pr_info("kobject: '%s' (%p): %s, parent %p (delayed %ld)\n", + kobject_name(kobj), kobj, __func__, kobj->parent, delay); INIT_DELAYED_WORK(&kobj->release, kobject_delayed_cleanup); - schedule_delayed_work(&kobj->release, HZ); + + schedule_delayed_work(&kobj->release, delay); #else kobject_cleanup(kobj); #endif -- cgit v1.2.3 From 35a5fe695b07ae899510ad76fdf0aeaef85fe951 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 5 Dec 2013 17:38:00 -0700 Subject: kobject: remove kset from sysfs immediately in kset_unregister() There's no "unlink from sysfs" interface for ksets, so I think callers of kset_unregister() expect the kset to be removed from sysfs immediately, without waiting for the last reference to be released. This patch makes the sysfs removal happen immediately, so the caller may create a new kset with the same name as soon as kset_unregister() returns. Without this, every caller has to call "kobject_del(&kset->kobj)" first unless it knows it will never create a new kset with the same name. This sometimes shows up on module unload and reload, where the reload fails because it tries to create a kobject with the same name as one from the original load that still exists. CONFIG_DEBUG_KOBJECT_RELEASE=y makes this problem easier to hit. Signed-off-by: Bjorn Helgaas Signed-off-by: Greg Kroah-Hartman --- lib/kobject.c | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/kobject.c b/lib/kobject.c index 1d110dc95db5..98b45bb33c8d 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -855,6 +855,7 @@ void kset_unregister(struct kset *k) { if (!k) return; + kobject_del(&k->kobj); kobject_put(&k->kobj); } -- cgit v1.2.3 From 0d6077f8b48ed2dce8f2466a76c0d574a3b4dbe9 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 26 Nov 2013 12:43:37 +0800 Subject: lib/scatterlist: export sg_miter_skip() sg_copy_buffer() can't meet demand for some drrivers(such usb mass storage), so we have to use the sg_miter_* APIs to access sg buffer, then need export sg_miter_skip() for these drivers. The API is needed for converting to sg_miter_* APIs in USB storage driver for accessing sg buffer. Acked-by: Andrew Morton Cc: FUJITA Tomonori Cc: Jens Axboe Signed-off-by: Ming Lei Reviewed-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- lib/scatterlist.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/scatterlist.c b/lib/scatterlist.c index d16fa295ae1d..3a8e8e8fb2a5 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -495,7 +495,7 @@ static bool sg_miter_get_next_page(struct sg_mapping_iter *miter) * true if @miter contains the valid mapping. false if end of sg * list is reached. */ -static bool sg_miter_skip(struct sg_mapping_iter *miter, off_t offset) +bool sg_miter_skip(struct sg_mapping_iter *miter, off_t offset) { sg_miter_stop(miter); @@ -513,6 +513,7 @@ static bool sg_miter_skip(struct sg_mapping_iter *miter, off_t offset) return true; } +EXPORT_SYMBOL(sg_miter_skip); /** * sg_miter_next - proceed mapping iterator to the next mapping -- cgit v1.2.3 From 020d30f17f196dcbf0c2c68a874345e8885a3149 Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Fri, 8 Nov 2013 15:28:25 +0100 Subject: kobject: fix memory leak in kobject_set_name_vargs If the call to kvasprintf fails then the old name of the object will be leaked, this patch fixes the bug by restoring the old name before returning ENOMEM. Signed-off-by: Maurizio Lombardi Signed-off-by: Greg Kroah-Hartman --- lib/kobject.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/kobject.c b/lib/kobject.c index 98b45bb33c8d..94b321f4ac67 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -265,8 +265,10 @@ int kobject_set_name_vargs(struct kobject *kobj, const char *fmt, return 0; kobj->name = kvasprintf(GFP_KERNEL, fmt, vargs); - if (!kobj->name) + if (!kobj->name) { + kobj->name = old_name; return -ENOMEM; + } /* ewww... some of these buggers have '/' in the name ... */ while ((s = strchr(kobj->name, '/'))) -- cgit v1.2.3 From 324a56e16e44baecac3ca799fd216154145c14bf Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 11 Dec 2013 14:11:53 -0500 Subject: kernfs: s/sysfs_dirent/kernfs_node/ and rename its friends accordingly kernfs has just been separated out from sysfs and we're already in full conflict mode. Nothing can make the situation any worse. Let's take the chance to name things properly. This patch performs the following renames. * s/sysfs_elem_dir/kernfs_elem_dir/ * s/sysfs_elem_symlink/kernfs_elem_symlink/ * s/sysfs_elem_attr/kernfs_elem_file/ * s/sysfs_dirent/kernfs_node/ * s/sd/kn/ in kernfs proper * s/parent_sd/parent/ * s/target_sd/target/ * s/dir_sd/parent/ * s/to_sysfs_dirent()/rb_to_kn()/ * misc renames of local vars when they conflict with the above Because md, mic and gpio dig into sysfs details, this patch ends up modifying them. All are sysfs_dirent renames and trivial. While we can avoid these by introducing a dummy wrapping struct sysfs_dirent around kernfs_node, given the limited usage outside kernfs and sysfs proper, I don't think such workaround is called for. This patch is strictly rename only and doesn't introduce any functional difference. - mic / gpio renames were missing. Spotted by kbuild test robot. Signed-off-by: Tejun Heo Cc: Neil Brown Cc: Linus Walleij Cc: Ashutosh Dixit Cc: kbuild test robot Signed-off-by: Greg Kroah-Hartman --- lib/kobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/kobject.c b/lib/kobject.c index 94b321f4ac67..064451f2a6c3 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -556,7 +556,7 @@ out: */ void kobject_del(struct kobject *kobj) { - struct sysfs_dirent *sd; + struct kernfs_node *sd; if (!kobj) return; -- cgit v1.2.3 From 71ae8aac3e198c6f3577cb7ad3a17f6128e97bfa Mon Sep 17 00:00:00 2001 From: Francesco Fusco Date: Thu, 12 Dec 2013 16:09:05 +0100 Subject: lib: introduce arch optimized hash library We introduce a new hashing library that is meant to be used in the contexts where speed is more important than uniformity of the hashed values. The hash library leverages architecture specific implementation to achieve high performance and fall backs to jhash() for the generic case. On Intel-based x86 architectures, the library can exploit the crc32l instruction, part of the Intel SSE4.2 instruction set, if the instruction is supported by the processor. This implementation is twice as fast as the jhash() implementation on an i7 processor. Additional architectures, such as Arm64 provide instructions for accelerating the computation of CRC, so they could be added as well in follow-up work. Signed-off-by: Francesco Fusco Signed-off-by: Daniel Borkmann Signed-off-by: Thomas Graf Cc: linux-kernel@vger.kernel.org Signed-off-by: David S. Miller --- lib/Makefile | 2 +- lib/hash.c | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) create mode 100644 lib/hash.c (limited to 'lib') diff --git a/lib/Makefile b/lib/Makefile index a459c31e8c6b..d0f79c547d97 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -26,7 +26,7 @@ obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \ gcd.o lcm.o list_sort.o uuid.o flex_array.o iovec.o clz_ctz.o \ bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o \ - percpu-refcount.o percpu_ida.o + percpu-refcount.o percpu_ida.o hash.o obj-y += string_helpers.o obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o obj-y += kstrtox.o diff --git a/lib/hash.c b/lib/hash.c new file mode 100644 index 000000000000..b89f06a2d606 --- /dev/null +++ b/lib/hash.c @@ -0,0 +1,38 @@ +/* General purpose hashing library + * + * That's a start of a kernel hashing library, which can be extended + * with further algorithms in future. arch_fast_hash{2,}() will + * eventually resolve to an architecture optimized implementation. + * + * Copyright 2013 Francesco Fusco + * Copyright 2013 Daniel Borkmann + * Copyright 2013 Thomas Graf + * Licensed under the GNU General Public License, version 2.0 (GPLv2) + */ + +#include +#include + +static struct fast_hash_ops arch_hash_ops __read_mostly = { + .hash = jhash, + .hash2 = jhash2, +}; + +u32 arch_fast_hash(const void *data, u32 len, u32 seed) +{ + return arch_hash_ops.hash(data, len, seed); +} +EXPORT_SYMBOL_GPL(arch_fast_hash); + +u32 arch_fast_hash2(const u32 *data, u32 len, u32 seed) +{ + return arch_hash_ops.hash2(data, len, seed); +} +EXPORT_SYMBOL_GPL(arch_fast_hash2); + +static int __init hashlib_init(void) +{ + setup_arch_fast_hash(&arch_hash_ops); + return 0; +} +early_initcall(hashlib_init); -- cgit v1.2.3 From 237217546d44fe06c16b8895eecaef22f18ba5ee Mon Sep 17 00:00:00 2001 From: Francesco Fusco Date: Wed, 18 Dec 2013 16:05:48 +0100 Subject: lib: hash: follow-up fixups for arch hash This patch adds the include file to pull in __read_mostly on some architectures e.g. ppc and also fixes up signatures in generic asm. Reported-by: Fengguang Wu Signed-off-by: Francesco Fusco Signed-off-by: David S. Miller --- lib/hash.c | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/hash.c b/lib/hash.c index b89f06a2d606..fea973f4bd57 100644 --- a/lib/hash.c +++ b/lib/hash.c @@ -12,6 +12,7 @@ #include #include +#include static struct fast_hash_ops arch_hash_ops __read_mostly = { .hash = jhash, -- cgit v1.2.3 From eb4c69033fd1dda8b60e48af3accdd578c2e59ed Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 4 Jan 2014 19:56:40 -0800 Subject: Revert "kobject: introduce kobj_completion" This reverts commit eee031649707db3c9920d9498f8d03819b74fc23. Jeff writes: I have no objections to reverting it. There were concerns from Al Viro that it'd be tough to get right by callers and I had assumed it got dropped after that. I had planned on using it in my btrfs sysfs exports patchset but came up with a better way. Cc: Jeff Mahoney Cc: Al Viro Signed-off-by: Greg Kroah-Hartman --- lib/kobject.c | 50 -------------------------------------------------- 1 file changed, 50 deletions(-) (limited to 'lib') diff --git a/lib/kobject.c b/lib/kobject.c index 064451f2a6c3..f7f69cbe4f6e 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -13,7 +13,6 @@ */ #include -#include #include #include #include @@ -781,55 +780,6 @@ const struct sysfs_ops kobj_sysfs_ops = { .store = kobj_attr_store, }; -/** - * kobj_completion_init - initialize a kobj_completion object. - * @kc: kobj_completion - * @ktype: type of kobject to initialize - * - * kobj_completion structures can be embedded within structures with different - * lifetime rules. During the release of the enclosing object, we can - * wait on the release of the kobject so that we don't free it while it's - * still busy. - */ -void kobj_completion_init(struct kobj_completion *kc, struct kobj_type *ktype) -{ - init_completion(&kc->kc_unregister); - kobject_init(&kc->kc_kobj, ktype); -} -EXPORT_SYMBOL_GPL(kobj_completion_init); - -/** - * kobj_completion_release - release a kobj_completion object - * @kobj: kobject embedded in kobj_completion - * - * Used with kobject_release to notify waiters that the kobject has been - * released. - */ -void kobj_completion_release(struct kobject *kobj) -{ - struct kobj_completion *kc = kobj_to_kobj_completion(kobj); - complete(&kc->kc_unregister); -} -EXPORT_SYMBOL_GPL(kobj_completion_release); - -/** - * kobj_completion_del_and_wait - release the kobject and wait for it - * @kc: kobj_completion object to release - * - * Delete the kobject from sysfs and drop the reference count. Then wait - * until any other outstanding references are also dropped. This routine - * is only necessary once other references may have been taken on the - * kobject. Typically this happens when the kobject has been published - * to sysfs via kobject_add. - */ -void kobj_completion_del_and_wait(struct kobj_completion *kc) -{ - kobject_del(&kc->kc_kobj); - kobject_put(&kc->kc_kobj); - wait_for_completion(&kc->kc_unregister); -} -EXPORT_SYMBOL_GPL(kobj_completion_del_and_wait); - /** * kset_register - initialize and add a kset. * @k: kset. -- cgit v1.2.3 From 0cb637bff80d5ba2b916bb19f19ffd59cd4079fd Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 16 Dec 2013 14:05:01 -0500 Subject: swiotlb: Don't DoS us with 'swiotlb buffer is full' (v2) There is no need for that so lets use ratelimiting. Also add some extra information to be helpful. Acked-by: Stefano Stabellini [v2: s/ld/zs on the printk] Signed-off-by: Konrad Rzeszutek Wilk --- lib/swiotlb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/swiotlb.c b/lib/swiotlb.c index e4399fa65ad6..4634ac9cdb38 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -505,7 +505,8 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, not_found: spin_unlock_irqrestore(&io_tlb_lock, flags); - dev_warn(hwdev, "swiotlb buffer is full\n"); + if (printk_ratelimit()) + dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes)\n", size); return SWIOTLB_MAP_ERROR; found: spin_unlock_irqrestore(&io_tlb_lock, flags); -- cgit v1.2.3 From 9705710e40b9e5acaf003f90d6ed883ba193b314 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Sat, 4 Jan 2014 14:20:04 +0100 Subject: kobject: Fix source code comment spelling Signed-off-by: Bart Van Assche Signed-off-by: Greg Kroah-Hartman --- lib/kobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/kobject.c b/lib/kobject.c index f7f69cbe4f6e..b0b26665c611 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -365,7 +365,7 @@ static int kobject_add_varg(struct kobject *kobj, struct kobject *parent, * * If @parent is set, then the parent of the @kobj will be set to it. * If @parent is NULL, then the parent of the @kobj will be set to the - * kobject associted with the kset assigned to this kobject. If no kset + * kobject associated with the kset assigned to this kobject. If no kset * is assigned to the kobject, then the kobject will be located in the * root of the sysfs tree. * -- cgit v1.2.3 From 8bc588e0e585bc9085df75e84d4d5635f45cf360 Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Sun, 22 Dec 2013 11:34:22 +0100 Subject: firewire: ohci: Turn remote DMA support into a module parameter This makes it possible to debug kernel over FireWire without the need to recompile it. [Stefan R: changed description from "...0" to "...N"] Cc: Dave Hansen Signed-off-by: Lubomir Rintel Signed-off-by: Stefan Richter --- lib/Kconfig.debug | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index db25707aa41b..dc30284a3b07 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1547,17 +1547,6 @@ config PROVIDE_OHCI1394_DMA_INIT See Documentation/debugging-via-ohci1394.txt for more information. -config FIREWIRE_OHCI_REMOTE_DMA - bool "Remote debugging over FireWire with firewire-ohci" - depends on FIREWIRE_OHCI - help - This option lets you use the FireWire bus for remote debugging - with help of the firewire-ohci driver. It enables unfiltered - remote DMA in firewire-ohci. - See Documentation/debugging-via-ohci1394.txt for more information. - - If unsure, say N. - config BUILD_DOCSRC bool "Build targets in Documentation/ tree" depends on HEADERS_CHECK -- cgit v1.2.3 From 03144b5869d2921dafbea477246aeb5d4eb5fc38 Mon Sep 17 00:00:00 2001 From: Michael Dalton Date: Thu, 16 Jan 2014 22:23:29 -0800 Subject: lib: Ensure EWMA does not store wrong intermediate values To ensure ewma_read() without a lock returns a valid but possibly out of date average, modify ewma_add() by using ACCESS_ONCE to prevent intermediate wrong values from being written to avg->internal. Suggested-by: Eric Dumazet Acked-by: Michael S. Tsirkin Acked-by: Eric Dumazet Signed-off-by: Michael Dalton Signed-off-by: David S. Miller --- lib/average.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/average.c b/lib/average.c index 99a67e662b3c..114d1beae0c7 100644 --- a/lib/average.c +++ b/lib/average.c @@ -53,8 +53,10 @@ EXPORT_SYMBOL(ewma_init); */ struct ewma *ewma_add(struct ewma *avg, unsigned long val) { - avg->internal = avg->internal ? - (((avg->internal << avg->weight) - avg->internal) + + unsigned long internal = ACCESS_ONCE(avg->internal); + + ACCESS_ONCE(avg->internal) = internal ? + (((internal << avg->weight) - internal) + (val << avg->factor)) >> avg->weight : (val << avg->factor); return avg; -- cgit v1.2.3 From 82ef3d5d5f3ffd757c960693c4fe7a0051211849 Mon Sep 17 00:00:00 2001 From: Weilong Chen Date: Thu, 16 Jan 2014 17:24:31 +0800 Subject: net: fix "queues" uevent between network namespaces When I create a new namespace with 'ip netns add net0', or add/remove new links in a namespace with 'ip link add/delete type veth', rx/tx queues events can be got in all namespaces. That is because rx/tx queue ktypes do not have namespace support, and their kobj parents are setted to NULL. This patch is to fix it. Reported-by: Libo Chen Signed-off-by: Libo Chen Signed-off-by: Weilong Chen Acked-by: Greg Kroah-Hartman Signed-off-by: David S. Miller --- lib/kobject_uevent.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index 52e5abbc41db..5f72767ddd9b 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -88,11 +88,17 @@ out: #ifdef CONFIG_NET static int kobj_bcast_filter(struct sock *dsk, struct sk_buff *skb, void *data) { - struct kobject *kobj = data; + struct kobject *kobj = data, *ksobj; const struct kobj_ns_type_operations *ops; ops = kobj_ns_ops(kobj); - if (ops) { + if (!ops && kobj->kset) { + ksobj = &kobj->kset->kobj; + if (ksobj->parent != NULL) + ops = kobj_ns_ops(ksobj->parent); + } + + if (ops && ops->netlink_ns && kobj->ktype->namespace) { const void *sock_ns, *ns; ns = kobj->ktype->namespace(kobj); sock_ns = ops->netlink_ns(dsk); -- cgit v1.2.3 From 687b0ad2751ca8ea418396fa780e22571fba76a8 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 6 Jan 2014 13:13:26 -0800 Subject: percpu-refcount: Add a WARN() for ref going negative AIO had a missing get, which led to an ioctx leak - after percpu_ref_kill() the ref was 0 so percpu_ref_put() never saw it hit 0. This wasn't noticed at the time because it all happened completely silently, this adds a WARN() which would've caught the aio bug. tj: Used WARN_ONCE() instead of WARN(). Signed-off-by: Kent Overstreet Signed-off-by: Tejun Heo --- lib/percpu-refcount.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'lib') diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c index 1a53d497a8c5..963b7034a51b 100644 --- a/lib/percpu-refcount.c +++ b/lib/percpu-refcount.c @@ -120,6 +120,9 @@ static void percpu_ref_kill_rcu(struct rcu_head *rcu) atomic_add((int) count - PCPU_COUNT_BIAS, &ref->count); + WARN_ONCE(atomic_read(&ref->count) <= 0, "percpu ref <= 0 (%i)", + atomic_read(&ref->count)); + /* @ref is viewed as dead on all CPUs, send out kill confirmation */ if (ref->confirm_kill) ref->confirm_kill(ref); -- cgit v1.2.3 From 0abdd7a81b7e3fd781d7fabcca49501852bba17e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 21 Jan 2014 15:48:12 -0800 Subject: dma-debug: introduce debug_dma_assert_idle() Record actively mapped pages and provide an api for asserting a given page is dma inactive before execution proceeds. Placing debug_dma_assert_idle() in cow_user_page() flagged the violation of the dma-api in the NET_DMA implementation (see commit 77873803363c "net_dma: mark broken"). The implementation includes the capability to count, in a limited way, repeat mappings of the same page that occur without an intervening unmap. This 'overlap' counter is limited to the few bits of tag space in a radix tree. This mechanism is added to mitigate false negative cases where, for example, a page is dma mapped twice and debug_dma_assert_idle() is called after the page is un-mapped once. Signed-off-by: Dan Williams Cc: Joerg Roedel Cc: Vinod Koul Cc: Russell King Cc: James Bottomley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig.debug | 12 +++- lib/dma-debug.c | 193 ++++++++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 190 insertions(+), 15 deletions(-) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 6982094a7e74..900b63c1e899 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1584,8 +1584,16 @@ config DMA_API_DEBUG With this option you will be able to detect common bugs in device drivers like double-freeing of DMA mappings or freeing mappings that were never allocated. - This option causes a performance degredation. Use only if you want - to debug device drivers. If unsure, say N. + + This also attempts to catch cases where a page owned by DMA is + accessed by the cpu in a way that could cause data corruption. For + example, this enables cow_user_page() to check that the source page is + not undergoing DMA. + + This option causes a performance degradation. Use only if you want to + debug device drivers and dma interactions. + + If unsure, say N. source "samples/Kconfig" diff --git a/lib/dma-debug.c b/lib/dma-debug.c index d87a17a819d0..c38083871f11 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -53,11 +53,26 @@ enum map_err_types { #define DMA_DEBUG_STACKTRACE_ENTRIES 5 +/** + * struct dma_debug_entry - track a dma_map* or dma_alloc_coherent mapping + * @list: node on pre-allocated free_entries list + * @dev: 'dev' argument to dma_map_{page|single|sg} or dma_alloc_coherent + * @type: single, page, sg, coherent + * @pfn: page frame of the start address + * @offset: offset of mapping relative to pfn + * @size: length of the mapping + * @direction: enum dma_data_direction + * @sg_call_ents: 'nents' from dma_map_sg + * @sg_mapped_ents: 'mapped_ents' from dma_map_sg + * @map_err_type: track whether dma_mapping_error() was checked + * @stacktrace: support backtraces when a violation is detected + */ struct dma_debug_entry { struct list_head list; struct device *dev; int type; - phys_addr_t paddr; + unsigned long pfn; + size_t offset; u64 dev_addr; u64 size; int direction; @@ -372,6 +387,11 @@ static void hash_bucket_del(struct dma_debug_entry *entry) list_del(&entry->list); } +static unsigned long long phys_addr(struct dma_debug_entry *entry) +{ + return page_to_phys(pfn_to_page(entry->pfn)) + entry->offset; +} + /* * Dump mapping entries for debugging purposes */ @@ -389,9 +409,9 @@ void debug_dma_dump_mappings(struct device *dev) list_for_each_entry(entry, &bucket->list, list) { if (!dev || dev == entry->dev) { dev_info(entry->dev, - "%s idx %d P=%Lx D=%Lx L=%Lx %s %s\n", + "%s idx %d P=%Lx N=%lx D=%Lx L=%Lx %s %s\n", type2name[entry->type], idx, - (unsigned long long)entry->paddr, + phys_addr(entry), entry->pfn, entry->dev_addr, entry->size, dir2name[entry->direction], maperr2str[entry->map_err_type]); @@ -403,6 +423,133 @@ void debug_dma_dump_mappings(struct device *dev) } EXPORT_SYMBOL(debug_dma_dump_mappings); +/* + * For each page mapped (initial page in the case of + * dma_alloc_coherent/dma_map_{single|page}, or each page in a + * scatterlist) insert into this tree using the pfn as the key. At + * dma_unmap_{single|sg|page} or dma_free_coherent delete the entry. If + * the pfn already exists at insertion time add a tag as a reference + * count for the overlapping mappings. For now, the overlap tracking + * just ensures that 'unmaps' balance 'maps' before marking the pfn + * idle, but we should also be flagging overlaps as an API violation. + * + * Memory usage is mostly constrained by the maximum number of available + * dma-debug entries in that we need a free dma_debug_entry before + * inserting into the tree. In the case of dma_map_{single|page} and + * dma_alloc_coherent there is only one dma_debug_entry and one pfn to + * track per event. dma_map_sg(), on the other hand, + * consumes a single dma_debug_entry, but inserts 'nents' entries into + * the tree. + * + * At any time debug_dma_assert_idle() can be called to trigger a + * warning if the given page is in the active set. + */ +static RADIX_TREE(dma_active_pfn, GFP_NOWAIT); +static DEFINE_SPINLOCK(radix_lock); +#define ACTIVE_PFN_MAX_OVERLAP ((1 << RADIX_TREE_MAX_TAGS) - 1) + +static int active_pfn_read_overlap(unsigned long pfn) +{ + int overlap = 0, i; + + for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--) + if (radix_tree_tag_get(&dma_active_pfn, pfn, i)) + overlap |= 1 << i; + return overlap; +} + +static int active_pfn_set_overlap(unsigned long pfn, int overlap) +{ + int i; + + if (overlap > ACTIVE_PFN_MAX_OVERLAP || overlap < 0) + return 0; + + for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--) + if (overlap & 1 << i) + radix_tree_tag_set(&dma_active_pfn, pfn, i); + else + radix_tree_tag_clear(&dma_active_pfn, pfn, i); + + return overlap; +} + +static void active_pfn_inc_overlap(unsigned long pfn) +{ + int overlap = active_pfn_read_overlap(pfn); + + overlap = active_pfn_set_overlap(pfn, ++overlap); + + /* If we overflowed the overlap counter then we're potentially + * leaking dma-mappings. Otherwise, if maps and unmaps are + * balanced then this overflow may cause false negatives in + * debug_dma_assert_idle() as the pfn may be marked idle + * prematurely. + */ + WARN_ONCE(overlap == 0, + "DMA-API: exceeded %d overlapping mappings of pfn %lx\n", + ACTIVE_PFN_MAX_OVERLAP, pfn); +} + +static int active_pfn_dec_overlap(unsigned long pfn) +{ + int overlap = active_pfn_read_overlap(pfn); + + return active_pfn_set_overlap(pfn, --overlap); +} + +static int active_pfn_insert(struct dma_debug_entry *entry) +{ + unsigned long flags; + int rc; + + spin_lock_irqsave(&radix_lock, flags); + rc = radix_tree_insert(&dma_active_pfn, entry->pfn, entry); + if (rc == -EEXIST) + active_pfn_inc_overlap(entry->pfn); + spin_unlock_irqrestore(&radix_lock, flags); + + return rc; +} + +static void active_pfn_remove(struct dma_debug_entry *entry) +{ + unsigned long flags; + + spin_lock_irqsave(&radix_lock, flags); + if (active_pfn_dec_overlap(entry->pfn) == 0) + radix_tree_delete(&dma_active_pfn, entry->pfn); + spin_unlock_irqrestore(&radix_lock, flags); +} + +/** + * debug_dma_assert_idle() - assert that a page is not undergoing dma + * @page: page to lookup in the dma_active_pfn tree + * + * Place a call to this routine in cases where the cpu touching the page + * before the dma completes (page is dma_unmapped) will lead to data + * corruption. + */ +void debug_dma_assert_idle(struct page *page) +{ + unsigned long flags; + struct dma_debug_entry *entry; + + if (!page) + return; + + spin_lock_irqsave(&radix_lock, flags); + entry = radix_tree_lookup(&dma_active_pfn, page_to_pfn(page)); + spin_unlock_irqrestore(&radix_lock, flags); + + if (!entry) + return; + + err_printk(entry->dev, entry, + "DMA-API: cpu touching an active dma mapped page " + "[pfn=0x%lx]\n", entry->pfn); +} + /* * Wrapper function for adding an entry to the hash. * This function takes care of locking itself. @@ -411,10 +558,21 @@ static void add_dma_entry(struct dma_debug_entry *entry) { struct hash_bucket *bucket; unsigned long flags; + int rc; bucket = get_hash_bucket(entry, &flags); hash_bucket_add(bucket, entry); put_hash_bucket(bucket, &flags); + + rc = active_pfn_insert(entry); + if (rc == -ENOMEM) { + pr_err("DMA-API: pfn tracking ENOMEM, dma-debug disabled\n"); + global_disable = true; + } + + /* TODO: report -EEXIST errors here as overlapping mappings are + * not supported by the DMA API + */ } static struct dma_debug_entry *__dma_entry_alloc(void) @@ -469,6 +627,8 @@ static void dma_entry_free(struct dma_debug_entry *entry) { unsigned long flags; + active_pfn_remove(entry); + /* * add to beginning of the list - this way the entries are * more likely cache hot when they are reallocated. @@ -895,15 +1055,15 @@ static void check_unmap(struct dma_debug_entry *ref) ref->dev_addr, ref->size, type2name[entry->type], type2name[ref->type]); } else if ((entry->type == dma_debug_coherent) && - (ref->paddr != entry->paddr)) { + (phys_addr(ref) != phys_addr(entry))) { err_printk(ref->dev, entry, "DMA-API: device driver frees " "DMA memory with different CPU address " "[device address=0x%016llx] [size=%llu bytes] " "[cpu alloc address=0x%016llx] " "[cpu free address=0x%016llx]", ref->dev_addr, ref->size, - (unsigned long long)entry->paddr, - (unsigned long long)ref->paddr); + phys_addr(entry), + phys_addr(ref)); } if (ref->sg_call_ents && ref->type == dma_debug_sg && @@ -1052,7 +1212,8 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, entry->dev = dev; entry->type = dma_debug_page; - entry->paddr = page_to_phys(page) + offset; + entry->pfn = page_to_pfn(page); + entry->offset = offset, entry->dev_addr = dma_addr; entry->size = size; entry->direction = direction; @@ -1148,7 +1309,8 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, entry->type = dma_debug_sg; entry->dev = dev; - entry->paddr = sg_phys(s); + entry->pfn = page_to_pfn(sg_page(s)); + entry->offset = s->offset, entry->size = sg_dma_len(s); entry->dev_addr = sg_dma_address(s); entry->direction = direction; @@ -1198,7 +1360,8 @@ void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist, struct dma_debug_entry ref = { .type = dma_debug_sg, .dev = dev, - .paddr = sg_phys(s), + .pfn = page_to_pfn(sg_page(s)), + .offset = s->offset, .dev_addr = sg_dma_address(s), .size = sg_dma_len(s), .direction = dir, @@ -1233,7 +1396,8 @@ void debug_dma_alloc_coherent(struct device *dev, size_t size, entry->type = dma_debug_coherent; entry->dev = dev; - entry->paddr = virt_to_phys(virt); + entry->pfn = page_to_pfn(virt_to_page(virt)); + entry->offset = (size_t) virt & PAGE_MASK; entry->size = size; entry->dev_addr = dma_addr; entry->direction = DMA_BIDIRECTIONAL; @@ -1248,7 +1412,8 @@ void debug_dma_free_coherent(struct device *dev, size_t size, struct dma_debug_entry ref = { .type = dma_debug_coherent, .dev = dev, - .paddr = virt_to_phys(virt), + .pfn = page_to_pfn(virt_to_page(virt)), + .offset = (size_t) virt & PAGE_MASK, .dev_addr = addr, .size = size, .direction = DMA_BIDIRECTIONAL, @@ -1356,7 +1521,8 @@ void debug_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, struct dma_debug_entry ref = { .type = dma_debug_sg, .dev = dev, - .paddr = sg_phys(s), + .pfn = page_to_pfn(sg_page(s)), + .offset = s->offset, .dev_addr = sg_dma_address(s), .size = sg_dma_len(s), .direction = direction, @@ -1388,7 +1554,8 @@ void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, struct dma_debug_entry ref = { .type = dma_debug_sg, .dev = dev, - .paddr = sg_phys(s), + .pfn = page_to_pfn(sg_page(s)), + .offset = s->offset, .dev_addr = sg_dma_address(s), .size = sg_dma_len(s), .direction = direction, -- cgit v1.2.3 From aec6a8889a98a0cd58357cd0937a25189908f191 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 21 Jan 2014 15:49:13 -0800 Subject: mm, show_mem: remove SHOW_MEM_FILTER_PAGE_COUNT Commit 4b59e6c47309 ("mm, show_mem: suppress page counts in non-blockable contexts") introduced SHOW_MEM_FILTER_PAGE_COUNT to suppress PFN walks on large memory machines. Commit c78e93630d15 ("mm: do not walk all of system memory during show_mem") avoided a PFN walk in the generic show_mem helper which removes the requirement for SHOW_MEM_FILTER_PAGE_COUNT in that case. This patch removes PFN walkers from the arch-specific implementations that report on a per-node or per-zone granularity. ARM and unicore32 still do a PFN walk as they report memory usage on each bank which is a much finer granularity where the debugging information may still be of use. As the remaining arches doing PFN walks have relatively small amounts of memory, this patch simply removes SHOW_MEM_FILTER_PAGE_COUNT. [akpm@linux-foundation.org: fix parisc] Signed-off-by: Mel Gorman Acked-by: David Rientjes Cc: Tony Luck Cc: Russell King Cc: James Bottomley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/show_mem.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'lib') diff --git a/lib/show_mem.c b/lib/show_mem.c index 5847a4921b8e..f58689f5a24e 100644 --- a/lib/show_mem.c +++ b/lib/show_mem.c @@ -17,9 +17,6 @@ void show_mem(unsigned int filter) printk("Mem-Info:\n"); show_free_areas(filter); - if (filter & SHOW_MEM_FILTER_PAGE_COUNT) - return; - for_each_online_pgdat(pgdat) { unsigned long flags; int zoneid; -- cgit v1.2.3 From 457ff1de2d247d9b8917c4664c2325321a35e313 Mon Sep 17 00:00:00 2001 From: Santosh Shilimkar Date: Tue, 21 Jan 2014 15:50:30 -0800 Subject: lib/swiotlb.c: use memblock apis for early memory allocations Switch to memblock interfaces for early memory allocator instead of bootmem allocator. No functional change in beahvior than what it is in current code from bootmem users points of view. Archs already converted to NO_BOOTMEM now directly use memblock interfaces instead of bootmem wrappers build on top of memblock. And the archs which still uses bootmem, these new apis just fallback to exiting bootmem APIs. Signed-off-by: Santosh Shilimkar Cc: "Rafael J. Wysocki" Cc: Arnd Bergmann Cc: Christoph Lameter Cc: Greg Kroah-Hartman Cc: Grygorii Strashko Cc: H. Peter Anvin Cc: Johannes Weiner Cc: KAMEZAWA Hiroyuki Cc: Konrad Rzeszutek Wilk Cc: Michal Hocko Cc: Paul Walmsley Cc: Pavel Machek Cc: Russell King Cc: Tejun Heo Cc: Tony Lindgren Cc: Yinghai Lu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/swiotlb.c | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) (limited to 'lib') diff --git a/lib/swiotlb.c b/lib/swiotlb.c index e4399fa65ad6..615f3de4b5ce 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -172,8 +172,9 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) /* * Get the overflow emergency buffer */ - v_overflow_buffer = alloc_bootmem_low_pages_nopanic( - PAGE_ALIGN(io_tlb_overflow)); + v_overflow_buffer = memblock_virt_alloc_nopanic( + PAGE_ALIGN(io_tlb_overflow), + PAGE_SIZE); if (!v_overflow_buffer) return -ENOMEM; @@ -184,11 +185,15 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE * between io_tlb_start and io_tlb_end. */ - io_tlb_list = alloc_bootmem_pages(PAGE_ALIGN(io_tlb_nslabs * sizeof(int))); + io_tlb_list = memblock_virt_alloc( + PAGE_ALIGN(io_tlb_nslabs * sizeof(int)), + PAGE_SIZE); for (i = 0; i < io_tlb_nslabs; i++) io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); io_tlb_index = 0; - io_tlb_orig_addr = alloc_bootmem_pages(PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t))); + io_tlb_orig_addr = memblock_virt_alloc( + PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)), + PAGE_SIZE); if (verbose) swiotlb_print_info(); @@ -215,13 +220,13 @@ swiotlb_init(int verbose) bytes = io_tlb_nslabs << IO_TLB_SHIFT; /* Get IO TLB memory from the low pages */ - vstart = alloc_bootmem_low_pages_nopanic(PAGE_ALIGN(bytes)); + vstart = memblock_virt_alloc_nopanic(PAGE_ALIGN(bytes), PAGE_SIZE); if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose)) return; if (io_tlb_start) - free_bootmem(io_tlb_start, - PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); + memblock_free_early(io_tlb_start, + PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); pr_warn("Cannot allocate SWIOTLB buffer"); no_iotlb_memory = true; } @@ -357,14 +362,14 @@ void __init swiotlb_free(void) free_pages((unsigned long)phys_to_virt(io_tlb_start), get_order(io_tlb_nslabs << IO_TLB_SHIFT)); } else { - free_bootmem_late(io_tlb_overflow_buffer, - PAGE_ALIGN(io_tlb_overflow)); - free_bootmem_late(__pa(io_tlb_orig_addr), - PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t))); - free_bootmem_late(__pa(io_tlb_list), - PAGE_ALIGN(io_tlb_nslabs * sizeof(int))); - free_bootmem_late(io_tlb_start, - PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); + memblock_free_late(io_tlb_overflow_buffer, + PAGE_ALIGN(io_tlb_overflow)); + memblock_free_late(__pa(io_tlb_orig_addr), + PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t))); + memblock_free_late(__pa(io_tlb_list), + PAGE_ALIGN(io_tlb_nslabs * sizeof(int))); + memblock_free_late(io_tlb_start, + PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); } io_tlb_nslabs = 0; } -- cgit v1.2.3 From c15295001aa940df4e3cf6574808a4addca9f2e5 Mon Sep 17 00:00:00 2001 From: Santosh Shilimkar Date: Tue, 21 Jan 2014 15:50:32 -0800 Subject: lib/cpumask.c: use memblock apis for early memory allocations Switch to memblock interfaces for early memory allocator instead of bootmem allocator. No functional change in beahvior than what it is in current code from bootmem users points of view. Archs already converted to NO_BOOTMEM now directly use memblock interfaces instead of bootmem wrappers build on top of memblock. And the archs which still uses bootmem, these new apis just fallback to exiting bootmem APIs. Signed-off-by: Santosh Shilimkar Cc: "Rafael J. Wysocki" Cc: Arnd Bergmann Cc: Christoph Lameter Cc: Greg Kroah-Hartman Cc: Grygorii Strashko Cc: H. Peter Anvin Cc: Johannes Weiner Cc: KAMEZAWA Hiroyuki Cc: Konrad Rzeszutek Wilk Cc: Michal Hocko Cc: Paul Walmsley Cc: Pavel Machek Cc: Russell King Cc: Tejun Heo Cc: Tony Lindgren Cc: Yinghai Lu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/cpumask.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/cpumask.c b/lib/cpumask.c index d327b87c99b7..b810b753c607 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -140,7 +140,7 @@ EXPORT_SYMBOL(zalloc_cpumask_var); */ void __init alloc_bootmem_cpumask_var(cpumask_var_t *mask) { - *mask = alloc_bootmem(cpumask_size()); + *mask = memblock_virt_alloc(cpumask_size(), 0); } /** @@ -161,6 +161,6 @@ EXPORT_SYMBOL(free_cpumask_var); */ void __init free_bootmem_cpumask_var(cpumask_var_t mask) { - free_bootmem(__pa(mask), cpumask_size()); + memblock_free_early(__pa(mask), cpumask_size()); } #endif -- cgit v1.2.3 From 25487d73a9670d911530eee2e31c20889ba117db Mon Sep 17 00:00:00 2001 From: Xishi Qiu Date: Tue, 21 Jan 2014 15:50:57 -0800 Subject: lib/show_mem.c: show num_poisoned_pages when oom Show num_poisoned_pages when oom, it is a little helpful to find the reason. Also it will be emitted anytime show_mem() is called. Signed-off-by: Xishi Qiu Suggested-by: Naoya Horiguchi Acked-by: Michal Hocko Acked-by: David Rientjes Reviewed-by: Wanpeng Li Acked-by: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/show_mem.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'lib') diff --git a/lib/show_mem.c b/lib/show_mem.c index f58689f5a24e..09225796991a 100644 --- a/lib/show_mem.c +++ b/lib/show_mem.c @@ -43,4 +43,7 @@ void show_mem(unsigned int filter) printk("%lu pages in pagetable cache\n", quicklist_total_size()); #endif +#ifdef CONFIG_MEMORY_FAILURE + printk("%lu pages hwpoisoned\n", atomic_long_read(&num_poisoned_pages)); +#endif } -- cgit v1.2.3 From 809fa972fd90ff27225294b17a027e908b2d7b7a Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Wed, 22 Jan 2014 02:29:41 +0100 Subject: reciprocal_divide: update/correction of the algorithm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Jakub Zawadzki noticed that some divisions by reciprocal_divide() were not correct [1][2], which he could also show with BPF code after divisions are transformed into reciprocal_value() for runtime invariance which can be passed to reciprocal_divide() later on; reverse in BPF dump ended up with a different, off-by-one K in some situations. This has been fixed by Eric Dumazet in commit aee636c4809fa5 ("bpf: do not use reciprocal divide"). This follow-up patch improves reciprocal_value() and reciprocal_divide() to work in all cases by using Granlund and Montgomery method, so that also future use is safe and without any non-obvious side-effects. Known problems with the old implementation were that division by 1 always returned 0 and some off-by-ones when the dividend and divisor where very large. This seemed to not be problematic with its current users, as far as we can tell. Eric Dumazet checked for the slab usage, we cannot surely say so in the case of flex_array. Still, in order to fix that, we propose an extension from the original implementation from commit 6a2d7a955d8d resp. [3][4], by using the algorithm proposed in "Division by Invariant Integers Using Multiplication" [5], Torbjörn Granlund and Peter L. Montgomery, that is, pseudocode for q = n/d where q, n, d is in u32 universe: 1) Initialization: int l = ceil(log_2 d) uword m' = floor((1<<32)*((1<>32 q = (t+((n-t)>>sh_1))>>sh_2 The assembler implementation from Agner Fog [6] also helped a lot while implementing. We have tested the implementation on x86_64, ppc64, i686, s390x; on x86_64/haswell we're still half the latency compared to normal divide. Joint work with Daniel Borkmann. [1] http://www.wireshark.org/~darkjames/reciprocal-buggy.c [2] http://www.wireshark.org/~darkjames/set-and-dump-filter-k-bug.c [3] https://gmplib.org/~tege/division-paper.pdf [4] http://homepage.cs.uiowa.edu/~jones/bcd/divide.html [5] http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.2556 [6] http://www.agner.org/optimize/asmlib.zip Reported-by: Jakub Zawadzki Cc: Eric Dumazet Cc: Austin S Hemmelgarn Cc: linux-kernel@vger.kernel.org Cc: Jesse Gross Cc: Jamal Hadi Salim Cc: Stephen Hemminger Cc: Matt Mackall Cc: Pekka Enberg Cc: Christoph Lameter Cc: Andy Gospodarek Cc: Veaceslav Falico Cc: Jay Vosburgh Cc: Jakub Zawadzki Signed-off-by: Daniel Borkmann Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- lib/flex_array.c | 7 ++++++- lib/reciprocal_div.c | 24 ++++++++++++++++++++---- 2 files changed, 26 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/flex_array.c b/lib/flex_array.c index 6948a6692fc4..2eed22fa507c 100644 --- a/lib/flex_array.c +++ b/lib/flex_array.c @@ -90,8 +90,8 @@ struct flex_array *flex_array_alloc(int element_size, unsigned int total, { struct flex_array *ret; int elems_per_part = 0; - int reciprocal_elems = 0; int max_size = 0; + struct reciprocal_value reciprocal_elems = { 0 }; if (element_size) { elems_per_part = FLEX_ARRAY_ELEMENTS_PER_PART(element_size); @@ -119,6 +119,11 @@ EXPORT_SYMBOL(flex_array_alloc); static int fa_element_to_part_nr(struct flex_array *fa, unsigned int element_nr) { + /* + * if element_size == 0 we don't get here, so we never touch + * the zeroed fa->reciprocal_elems, which would yield invalid + * results + */ return reciprocal_divide(element_nr, fa->reciprocal_elems); } diff --git a/lib/reciprocal_div.c b/lib/reciprocal_div.c index 75510e94f7d0..464152410c51 100644 --- a/lib/reciprocal_div.c +++ b/lib/reciprocal_div.c @@ -1,11 +1,27 @@ +#include #include #include #include -u32 reciprocal_value(u32 k) +/* + * For a description of the algorithm please have a look at + * include/linux/reciprocal_div.h + */ + +struct reciprocal_value reciprocal_value(u32 d) { - u64 val = (1LL << 32) + (k - 1); - do_div(val, k); - return (u32)val; + struct reciprocal_value R; + u64 m; + int l; + + l = fls(d - 1); + m = ((1ULL << 32) * ((1ULL << l) - d)); + do_div(m, d); + ++m; + R.m = (u32)m; + R.sh1 = min(l, 1); + R.sh2 = max(l - 1, 0); + + return R; } EXPORT_SYMBOL(reciprocal_value); -- cgit v1.2.3 From 30b02c4b2a1eb6b6ca83e31c6b65f63076a03c5b Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 23 Jan 2014 13:24:09 +0000 Subject: assoc_array: remove global variable The associative array code creates unnecessary and potentially problematic global variable 'status'. Remove it since never used. Signed-off-by: Stephen Hemminger Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- lib/assoc_array.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/assoc_array.c b/lib/assoc_array.c index 1b6a44f1ec3e..c0b1007011e1 100644 --- a/lib/assoc_array.c +++ b/lib/assoc_array.c @@ -157,7 +157,7 @@ enum assoc_array_walk_status { assoc_array_walk_tree_empty, assoc_array_walk_found_terminal_node, assoc_array_walk_found_wrong_shortcut, -} status; +}; struct assoc_array_walk_result { struct { -- cgit v1.2.3 From 6f6b5d1ec56acdeab0503d2b823f6f88a0af493e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 19 Jan 2014 08:26:37 +0000 Subject: percpu_ida: Make percpu_ida_alloc + callers accept task state bitmask This patch changes percpu_ida_alloc() + callers to accept task state bitmask for prepare_to_wait() for code like target/iscsi that needs it for interruptible sleep, that is provided in a subsequent patch. It now expects TASK_UNINTERRUPTIBLE when the caller is able to sleep waiting for a new tag, or TASK_RUNNING when the caller cannot sleep, and is forced to return a negative value when no tags are available. v2 changes: - Include blk-mq + tcm_fc + vhost/scsi + target/iscsi changes - Drop signal_pending_state() call v3 changes: - Only call prepare_to_wait() + finish_wait() when != TASK_RUNNING (PeterZ) Reported-by: Linus Torvalds Cc: Linus Torvalds Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Jens Axboe Signed-off-by: Kent Overstreet Cc: #3.12+ Signed-off-by: Nicholas Bellinger --- lib/percpu_ida.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c index 9d054bf91d0f..58b671484ac2 100644 --- a/lib/percpu_ida.c +++ b/lib/percpu_ida.c @@ -132,22 +132,22 @@ static inline unsigned alloc_local_tag(struct percpu_ida_cpu *tags) /** * percpu_ida_alloc - allocate a tag * @pool: pool to allocate from - * @gfp: gfp flags + * @state: task state for prepare_to_wait * * Returns a tag - an integer in the range [0..nr_tags) (passed to * tag_pool_init()), or otherwise -ENOSPC on allocation failure. * * Safe to be called from interrupt context (assuming it isn't passed - * __GFP_WAIT, of course). + * TASK_UNINTERRUPTIBLE, of course). * * @gfp indicates whether or not to wait until a free id is available (it's not * used for internal memory allocations); thus if passed __GFP_WAIT we may sleep * however long it takes until another thread frees an id (same semantics as a * mempool). * - * Will not fail if passed __GFP_WAIT. + * Will not fail if passed TASK_UNINTERRUPTIBLE. */ -int percpu_ida_alloc(struct percpu_ida *pool, gfp_t gfp) +int percpu_ida_alloc(struct percpu_ida *pool, int state) { DEFINE_WAIT(wait); struct percpu_ida_cpu *tags; @@ -174,7 +174,8 @@ int percpu_ida_alloc(struct percpu_ida *pool, gfp_t gfp) * * global lock held and irqs disabled, don't need percpu lock */ - prepare_to_wait(&pool->wait, &wait, TASK_UNINTERRUPTIBLE); + if (state != TASK_RUNNING) + prepare_to_wait(&pool->wait, &wait, state); if (!tags->nr_free) alloc_global_tags(pool, tags); @@ -191,7 +192,7 @@ int percpu_ida_alloc(struct percpu_ida *pool, gfp_t gfp) spin_unlock(&pool->lock); local_irq_restore(flags); - if (tag >= 0 || !(gfp & __GFP_WAIT)) + if (tag >= 0 || state == TASK_RUNNING) break; schedule(); @@ -199,8 +200,9 @@ int percpu_ida_alloc(struct percpu_ida *pool, gfp_t gfp) local_irq_save(flags); tags = this_cpu_ptr(pool->tag_cpu); } + if (state != TASK_RUNNING) + finish_wait(&pool->wait, &wait); - finish_wait(&pool->wait, &wait); return tag; } EXPORT_SYMBOL_GPL(percpu_ida_alloc); -- cgit v1.2.3 From aace05097a0fd467230e39acb148be0fdaa90068 Mon Sep 17 00:00:00 2001 From: "Du, Changbin" Date: Thu, 23 Jan 2014 15:54:12 -0800 Subject: lib/parser.c: add match_wildcard() function match_wildcard function is a simple implementation of wildcard matching algorithm. It only supports two usual wildcardes: '*' - matches zero or more characters '?' - matches one character This algorithm is safe since it is non-recursive. Signed-off-by: Du, Changbin Cc: Jason Baron Cc: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/parser.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) (limited to 'lib') diff --git a/lib/parser.c b/lib/parser.c index 807b2aaa33fa..ee5295541cea 100644 --- a/lib/parser.c +++ b/lib/parser.c @@ -192,6 +192,56 @@ int match_hex(substring_t *s, int *result) return match_number(s, result, 16); } +/** + * match_wildcard: - parse if a string matches given wildcard pattern + * @pattern: wildcard pattern + * @str: the string to be parsed + * + * Description: Parse the string @str to check if matches wildcard + * pattern @pattern. The pattern may contain two type wildcardes: + * '*' - matches zero or more characters + * '?' - matches one character + * If it's matched, return true, else return false. + */ +bool match_wildcard(const char *pattern, const char *str) +{ + const char *s = str; + const char *p = pattern; + bool star = false; + + while (*s) { + switch (*p) { + case '?': + s++; + p++; + break; + case '*': + star = true; + str = s; + if (!*++p) + return true; + pattern = p; + break; + default: + if (*s == *p) { + s++; + p++; + } else { + if (!star) + return false; + str++; + s = str; + p = pattern; + } + break; + } + } + + if (*p == '*') + ++p; + return !*p; +} + /** * match_strlcpy: - Copy the characters from a substring_t to a sized buffer * @dest: where to copy to @@ -235,5 +285,6 @@ EXPORT_SYMBOL(match_token); EXPORT_SYMBOL(match_int); EXPORT_SYMBOL(match_octal); EXPORT_SYMBOL(match_hex); +EXPORT_SYMBOL(match_wildcard); EXPORT_SYMBOL(match_strlcpy); EXPORT_SYMBOL(match_strdup); -- cgit v1.2.3 From a3d2cca43cd31479d4f0414b4d014c4400a4e6d6 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 23 Jan 2014 15:54:13 -0800 Subject: lib/parser.c: put EXPORT_SYMBOLs in the conventional place Cc: Du, Changbin Cc: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/parser.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/parser.c b/lib/parser.c index ee5295541cea..b6d11631231b 100644 --- a/lib/parser.c +++ b/lib/parser.c @@ -113,6 +113,7 @@ int match_token(char *s, const match_table_t table, substring_t args[]) return p->token; } +EXPORT_SYMBOL(match_token); /** * match_number: scan a number in the given base from a substring_t @@ -163,6 +164,7 @@ int match_int(substring_t *s, int *result) { return match_number(s, result, 0); } +EXPORT_SYMBOL(match_int); /** * match_octal: - scan an octal representation of an integer from a substring_t @@ -177,6 +179,7 @@ int match_octal(substring_t *s, int *result) { return match_number(s, result, 8); } +EXPORT_SYMBOL(match_octal); /** * match_hex: - scan a hex representation of an integer from a substring_t @@ -191,6 +194,7 @@ int match_hex(substring_t *s, int *result) { return match_number(s, result, 16); } +EXPORT_SYMBOL(match_hex); /** * match_wildcard: - parse if a string matches given wildcard pattern @@ -241,6 +245,7 @@ bool match_wildcard(const char *pattern, const char *str) ++p; return !*p; } +EXPORT_SYMBOL(match_wildcard); /** * match_strlcpy: - Copy the characters from a substring_t to a sized buffer @@ -263,6 +268,7 @@ size_t match_strlcpy(char *dest, const substring_t *src, size_t size) } return ret; } +EXPORT_SYMBOL(match_strlcpy); /** * match_strdup: - allocate a new string with the contents of a substring_t @@ -280,11 +286,4 @@ char *match_strdup(const substring_t *s) match_strlcpy(p, s, sz); return p; } - -EXPORT_SYMBOL(match_token); -EXPORT_SYMBOL(match_int); -EXPORT_SYMBOL(match_octal); -EXPORT_SYMBOL(match_hex); -EXPORT_SYMBOL(match_wildcard); -EXPORT_SYMBOL(match_strlcpy); EXPORT_SYMBOL(match_strdup); -- cgit v1.2.3 From 578b1e0701af34f9ef69daabda4431f1e8501109 Mon Sep 17 00:00:00 2001 From: "Du, Changbin" Date: Thu, 23 Jan 2014 15:54:14 -0800 Subject: dynamic_debug: add wildcard support to filter files/functions/modules Add wildcard '*'(matches zero or more characters) and '?' (matches one character) support when qurying debug flags. Now we can open debug messages using keywords. eg: 1. open debug logs in all usb drivers echo "file drivers/usb/* +p" > /dynamic_debug/control 2. open debug logs for usb xhci code echo "file *xhci* +p" > /dynamic_debug/control Signed-off-by: Du, Changbin Cc: Jason Baron Cc: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/dynamic_debug.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index c37aeacd7651..600ac57e2777 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -8,6 +8,7 @@ * By Greg Banks * Copyright (c) 2008 Silicon Graphics Inc. All Rights Reserved. * Copyright (C) 2011 Bart Van Assche. All Rights Reserved. + * Copyright (C) 2013 Du, Changbin */ #define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__ @@ -24,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -147,7 +149,8 @@ static int ddebug_change(const struct ddebug_query *query, list_for_each_entry(dt, &ddebug_tables, link) { /* match against the module name */ - if (query->module && strcmp(query->module, dt->mod_name)) + if (query->module && + !match_wildcard(query->module, dt->mod_name)) continue; for (i = 0; i < dt->num_ddebugs; i++) { @@ -155,14 +158,16 @@ static int ddebug_change(const struct ddebug_query *query, /* match against the source filename */ if (query->filename && - strcmp(query->filename, dp->filename) && - strcmp(query->filename, kbasename(dp->filename)) && - strcmp(query->filename, trim_prefix(dp->filename))) + !match_wildcard(query->filename, dp->filename) && + !match_wildcard(query->filename, + kbasename(dp->filename)) && + !match_wildcard(query->filename, + trim_prefix(dp->filename))) continue; /* match against the function */ if (query->function && - strcmp(query->function, dp->function)) + !match_wildcard(query->function, dp->function)) continue; /* match against the format */ -- cgit v1.2.3 From aaf07621b8bbfdc0d87e9e5dbf1af3b24304998a Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 23 Jan 2014 15:54:17 -0800 Subject: vsprintf: add %pad extension for dma_addr_t use dma_addr_t's can be either u32 or u64 depending on a CONFIG option. There are a few hundred dma_addr_t's printed via either cast to unsigned long long, unsigned long or no cast at all. Add %pad to be able to emit them without the cast. Update Documentation/printk-formats.txt too. Signed-off-by: Joe Perches Cc: "Shevchenko, Andriy" Cc: Rob Landley Cc: Laurent Pinchart Cc: Julia Lawall Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/vsprintf.c | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 10909c571494..185b6d300ebc 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -1155,6 +1155,30 @@ char *netdev_feature_string(char *buf, char *end, const u8 *addr, return number(buf, end, *(const netdev_features_t *)addr, spec); } +static noinline_for_stack +char *address_val(char *buf, char *end, const void *addr, + struct printf_spec spec, const char *fmt) +{ + unsigned long long num; + + spec.flags |= SPECIAL | SMALL | ZEROPAD; + spec.base = 16; + + switch (fmt[1]) { + case 'd': + num = *(const dma_addr_t *)addr; + spec.field_width = sizeof(dma_addr_t) * 2 + 2; + break; + case 'p': + default: + num = *(const phys_addr_t *)addr; + spec.field_width = sizeof(phys_addr_t) * 2 + 2; + break; + } + + return number(buf, end, num, spec); +} + int kptr_restrict __read_mostly; /* @@ -1218,7 +1242,8 @@ int kptr_restrict __read_mostly; * N no separator * The maximum supported length is 64 bytes of the input. Consider * to use print_hex_dump() for the larger input. - * - 'a' For a phys_addr_t type and its derivative types (passed by reference) + * - 'a[pd]' For address types [p] phys_addr_t, [d] dma_addr_t and derivatives + * (default assumed to be phys_addr_t, passed by reference) * - 'd[234]' For a dentry name (optionally 2-4 last components) * - 'D[234]' Same as 'd' but for a struct file * @@ -1353,11 +1378,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, } break; case 'a': - spec.flags |= SPECIAL | SMALL | ZEROPAD; - spec.field_width = sizeof(phys_addr_t) * 2 + 2; - spec.base = 16; - return number(buf, end, - (unsigned long long) *((phys_addr_t *)ptr), spec); + return address_val(buf, end, ptr, spec, fmt); case 'd': return dentry_name(buf, end, ptr, spec, fmt); case 'D': -- cgit v1.2.3 From ae2924a2bdc5255745e68f2b9206404ddadfc5bf Mon Sep 17 00:00:00 2001 From: Felipe Contreras Date: Thu, 23 Jan 2014 15:54:34 -0800 Subject: lib/kstrtox.c: remove redundant cleanup We can't reach the cleanup code unless the flag KSTRTOX_OVERFLOW is not set, so there's not no point in clearing a bit that we know is not set. Signed-off-by: Felipe Contreras Acked-by: Levente Kurusa Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/kstrtox.c | 1 - 1 file changed, 1 deletion(-) (limited to 'lib') diff --git a/lib/kstrtox.c b/lib/kstrtox.c index f78ae0c0c4e2..ec8da78df9be 100644 --- a/lib/kstrtox.c +++ b/lib/kstrtox.c @@ -92,7 +92,6 @@ static int _kstrtoull(const char *s, unsigned int base, unsigned long long *res) rv = _parse_integer(s, base, &_res); if (rv & KSTRTOX_OVERFLOW) return -ERANGE; - rv &= ~KSTRTOX_OVERFLOW; if (rv == 0) return -EINVAL; s += rv; -- cgit v1.2.3 From 9fd4305448a4639deade433893c5233a324df3a2 Mon Sep 17 00:00:00 2001 From: Felipe Contreras Date: Thu, 23 Jan 2014 15:54:35 -0800 Subject: lib/cmdline.c: fix style issues WARNING: space prohibited between function name and open parenthesis '(' +int get_option (char **str, int *pint) WARNING: space prohibited between function name and open parenthesis '(' + *pint = simple_strtol (cur, str, 0); ERROR: trailing whitespace + $ WARNING: please, no spaces at the start of a line + $ WARNING: space prohibited between function name and open parenthesis '(' + res = get_option ((char **)&str, ints + i); Signed-off-by: Felipe Contreras Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/cmdline.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/cmdline.c b/lib/cmdline.c index eb6791188cf5..54663335226f 100644 --- a/lib/cmdline.c +++ b/lib/cmdline.c @@ -49,13 +49,13 @@ static int get_range(char **str, int *pint) * 3 - hyphen found to denote a range */ -int get_option (char **str, int *pint) +int get_option(char **str, int *pint) { char *cur = *str; if (!cur || !(*cur)) return 0; - *pint = simple_strtol (cur, str, 0); + *pint = simple_strtol(cur, str, 0); if (cur == *str) return 0; if (**str == ',') { @@ -84,13 +84,13 @@ int get_option (char **str, int *pint) * the parse to end (typically a null terminator, if @str is * completely parseable). */ - + char *get_options(const char *str, int nints, int *ints) { int res, i = 1; while (i < nints) { - res = get_option ((char **)&str, ints + i); + res = get_option((char **)&str, ints + i); if (res == 0) break; if (res == 3) { @@ -153,7 +153,6 @@ unsigned long long memparse(const char *ptr, char **retptr) return ret; } - EXPORT_SYMBOL(memparse); EXPORT_SYMBOL(get_option); EXPORT_SYMBOL(get_options); -- cgit v1.2.3 From ff6f9bbb582c1cb00cbe7ecd96bcde229fd336f7 Mon Sep 17 00:00:00 2001 From: Felipe Contreras Date: Thu, 23 Jan 2014 15:54:36 -0800 Subject: lib/cmdline.c: declare exported symbols immediately WARNING: EXPORT_SYMBOL(foo); should immediately follow its function/variable +EXPORT_SYMBOL(memparse); WARNING: EXPORT_SYMBOL(foo); should immediately follow its function/variable +EXPORT_SYMBOL(get_option); WARNING: EXPORT_SYMBOL(foo); should immediately follow its function/variable +EXPORT_SYMBOL(get_options); Signed-off-by: Felipe Contreras Cc: Levente Kurusa Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/cmdline.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/cmdline.c b/lib/cmdline.c index 54663335226f..d4932f745e92 100644 --- a/lib/cmdline.c +++ b/lib/cmdline.c @@ -67,6 +67,7 @@ int get_option(char **str, int *pint) return 1; } +EXPORT_SYMBOL(get_option); /** * get_options - Parse a string into a list of integers @@ -112,6 +113,7 @@ char *get_options(const char *str, int nints, int *ints) ints[0] = i - 1; return (char *)str; } +EXPORT_SYMBOL(get_options); /** * memparse - parse a string with mem suffixes into a number @@ -152,7 +154,4 @@ unsigned long long memparse(const char *ptr, char **retptr) return ret; } - EXPORT_SYMBOL(memparse); -EXPORT_SYMBOL(get_option); -EXPORT_SYMBOL(get_options); -- cgit v1.2.3 From 93e9ef83f40603535ffe6b60498149e75f33aa8f Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 23 Jan 2014 15:54:37 -0800 Subject: test: add minimal module for verification testing This is a pair of test modules I'd like to see in the tree. Instead of putting these in lkdtm, where I've been adding various tests that trigger crashes, these don't make sense there since they need to be either distinctly separate, or their pass/fail state don't need to crash the machine. These live in lib/ for now, along with a few other in-kernel test modules, and use the slightly more common "test_" naming convention, instead of "test-". We should likely standardize on the former: $ find . -name 'test_*.c' | grep -v /tools/ | wc -l 4 $ find . -name 'test-*.c' | grep -v /tools/ | wc -l 2 The first is entirely a no-op module, designed to allow simple testing of the module loading and verification interface. It's useful to have a module that has no other uses or dependencies so it can be reliably used for just testing module loading and verification. The second is a module that exercises the user memory access functions, in an effort to make sure that we can quickly catch any regressions in boundary checking (e.g. like what was recently fixed on ARM). This patch (of 2): When doing module loading verification tests (for example, with module signing, or LSM hooks), it is very handy to have a module that can be built on all systems under test, isn't auto-loaded at boot, and has no device or similar dependencies. This creates the "test_module.ko" module for that purpose, which only reports its load and unload to printk. Signed-off-by: Kees Cook Acked-by: Rusty Russell Cc: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig.debug | 14 ++++++++++++++ lib/Makefile | 1 + lib/test_module.c | 33 +++++++++++++++++++++++++++++++++ 3 files changed, 48 insertions(+) create mode 100644 lib/test_module.c (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 900b63c1e899..7e37a36b6913 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1595,6 +1595,20 @@ config DMA_API_DEBUG If unsure, say N. +config TEST_MODULE + tristate "Test module loading with 'hello world' module" + default n + depends on m + help + This builds the "test_module" module that emits "Hello, world" + on printk when loaded. It is designed to be used for basic + evaluation of the module loading subsystem (for example when + validating module verification). It lacks any extra dependencies, + and will not normally be loaded by the system unless explicitly + requested by name. + + If unsure, say N. + source "samples/Kconfig" source "lib/Kconfig.kgdb" diff --git a/lib/Makefile b/lib/Makefile index a459c31e8c6b..b494b9af631c 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -31,6 +31,7 @@ obj-y += string_helpers.o obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o obj-y += kstrtox.o obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o +obj-$(CONFIG_TEST_MODULE) += test_module.o ifeq ($(CONFIG_DEBUG_KOBJECT),y) CFLAGS_kobject.o += -DDEBUG diff --git a/lib/test_module.c b/lib/test_module.c new file mode 100644 index 000000000000..319b66f1ff61 --- /dev/null +++ b/lib/test_module.c @@ -0,0 +1,33 @@ +/* + * This module emits "Hello, world" on printk when loaded. + * + * It is designed to be used for basic evaluation of the module loading + * subsystem (for example when validating module signing/verification). It + * lacks any extra dependencies, and will not normally be loaded by the + * system unless explicitly requested by name. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include + +static int __init test_module_init(void) +{ + pr_warn("Hello, world\n"); + + return 0; +} + +module_init(test_module_init); + +static void __exit test_module_exit(void) +{ + pr_warn("Goodbye\n"); +} + +module_exit(test_module_exit); + +MODULE_AUTHOR("Kees Cook "); +MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 3e2a4c183ace8708c69f589505fb82bb63010ade Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 23 Jan 2014 15:54:38 -0800 Subject: test: check copy_to/from_user boundary validation To help avoid an architecture failing to correctly check kernel/user boundaries when handling copy_to_user, copy_from_user, put_user, or get_user, perform some simple tests and fail to load if any of them behave unexpectedly. Specifically, this is to make sure there is a way to notice if things like what was fixed in commit 8404663f81d2 ("ARM: 7527/1: uaccess: explicitly check __user pointer when !CPU_USE_DOMAINS") ever regresses again, for any architecture. Additionally, adds new "user" selftest target, which loads this module. Signed-off-by: Kees Cook Cc: Rusty Russell Cc: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig.debug | 13 ++++++ lib/Makefile | 1 + lib/test_user_copy.c | 110 +++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 124 insertions(+) create mode 100644 lib/test_user_copy.c (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 7e37a36b6913..e0e2eebf7ab3 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1609,6 +1609,19 @@ config TEST_MODULE If unsure, say N. +config TEST_USER_COPY + tristate "Test user/kernel boundary protections" + default n + depends on m + help + This builds the "test_user_copy" module that runs sanity checks + on the copy_to/from_user infrastructure, making sure basic + user/kernel boundary testing is working. If it fails to load, + a regression has been detected in the user/kernel memory boundary + protections. + + If unsure, say N. + source "samples/Kconfig" source "lib/Kconfig.kgdb" diff --git a/lib/Makefile b/lib/Makefile index b494b9af631c..98ec3b861062 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -32,6 +32,7 @@ obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o obj-y += kstrtox.o obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o obj-$(CONFIG_TEST_MODULE) += test_module.o +obj-$(CONFIG_TEST_USER_COPY) += test_user_copy.o ifeq ($(CONFIG_DEBUG_KOBJECT),y) CFLAGS_kobject.o += -DDEBUG diff --git a/lib/test_user_copy.c b/lib/test_user_copy.c new file mode 100644 index 000000000000..0ecef3e4690e --- /dev/null +++ b/lib/test_user_copy.c @@ -0,0 +1,110 @@ +/* + * Kernel module for testing copy_to/from_user infrastructure. + * + * Copyright 2013 Google Inc. All Rights Reserved + * + * Authors: + * Kees Cook + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include + +#define test(condition, msg) \ +({ \ + int cond = (condition); \ + if (cond) \ + pr_warn("%s\n", msg); \ + cond; \ +}) + +static int __init test_user_copy_init(void) +{ + int ret = 0; + char *kmem; + char __user *usermem; + char *bad_usermem; + unsigned long user_addr; + unsigned long value = 0x5A; + + kmem = kmalloc(PAGE_SIZE * 2, GFP_KERNEL); + if (!kmem) + return -ENOMEM; + + user_addr = vm_mmap(NULL, 0, PAGE_SIZE * 2, + PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_ANONYMOUS | MAP_PRIVATE, 0); + if (user_addr >= (unsigned long)(TASK_SIZE)) { + pr_warn("Failed to allocate user memory\n"); + kfree(kmem); + return -ENOMEM; + } + + usermem = (char __user *)user_addr; + bad_usermem = (char *)user_addr; + + /* Legitimate usage: none of these should fail. */ + ret |= test(copy_from_user(kmem, usermem, PAGE_SIZE), + "legitimate copy_from_user failed"); + ret |= test(copy_to_user(usermem, kmem, PAGE_SIZE), + "legitimate copy_to_user failed"); + ret |= test(get_user(value, (unsigned long __user *)usermem), + "legitimate get_user failed"); + ret |= test(put_user(value, (unsigned long __user *)usermem), + "legitimate put_user failed"); + + /* Invalid usage: none of these should succeed. */ + ret |= test(!copy_from_user(kmem, (char __user *)(kmem + PAGE_SIZE), + PAGE_SIZE), + "illegal all-kernel copy_from_user passed"); + ret |= test(!copy_from_user(bad_usermem, (char __user *)kmem, + PAGE_SIZE), + "illegal reversed copy_from_user passed"); + ret |= test(!copy_to_user((char __user *)kmem, kmem + PAGE_SIZE, + PAGE_SIZE), + "illegal all-kernel copy_to_user passed"); + ret |= test(!copy_to_user((char __user *)kmem, bad_usermem, + PAGE_SIZE), + "illegal reversed copy_to_user passed"); + ret |= test(!get_user(value, (unsigned long __user *)kmem), + "illegal get_user passed"); + ret |= test(!put_user(value, (unsigned long __user *)kmem), + "illegal put_user passed"); + + vm_munmap(user_addr, PAGE_SIZE * 2); + kfree(kmem); + + if (ret == 0) { + pr_info("tests passed.\n"); + return 0; + } + + return -EINVAL; +} + +module_init(test_user_copy_init); + +static void __exit test_user_copy_exit(void) +{ + pr_info("unloaded.\n"); +} + +module_exit(test_user_copy_exit); + +MODULE_AUTHOR("Kees Cook "); +MODULE_LICENSE("GPL"); -- cgit v1.2.3 From dbf128cbf9b90f97d74c734d1a768c564958e970 Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Thu, 23 Jan 2014 15:56:05 -0800 Subject: rbtree/test: move rb_node to the middle of the test struct Avoid making the rb_node the first entry to catch some bugs around NULL checking the rb_node. Signed-off-by: Cody P Schafer Cc: Michel Lespinasse Cc: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/rbtree_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/rbtree_test.c b/lib/rbtree_test.c index 31dd4ccd3baa..df6c125ff5c2 100644 --- a/lib/rbtree_test.c +++ b/lib/rbtree_test.c @@ -8,8 +8,8 @@ #define CHECK_LOOPS 100 struct test_node { - struct rb_node rb; u32 key; + struct rb_node rb; /* following fields used for testing augmented rbtree functionality */ u32 val; -- cgit v1.2.3 From 964fe94d71b771c8801134407ad8676874bb589e Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Thu, 23 Jan 2014 15:56:06 -0800 Subject: rbtree/test: test rbtree_postorder_for_each_entry_safe() Signed-off-by: Cody P Schafer Cc: Michel Lespinasse Cc: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/rbtree_test.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'lib') diff --git a/lib/rbtree_test.c b/lib/rbtree_test.c index df6c125ff5c2..8b3c9dc88262 100644 --- a/lib/rbtree_test.c +++ b/lib/rbtree_test.c @@ -114,6 +114,16 @@ static int black_path_count(struct rb_node *rb) return count; } +static void check_postorder_foreach(int nr_nodes) +{ + struct test_node *cur, *n; + int count = 0; + rbtree_postorder_for_each_entry_safe(cur, n, &root, rb) + count++; + + WARN_ON_ONCE(count != nr_nodes); +} + static void check_postorder(int nr_nodes) { struct rb_node *rb; @@ -148,6 +158,7 @@ static void check(int nr_nodes) WARN_ON_ONCE(count < (1 << black_path_count(rb_last(&root))) - 1); check_postorder(nr_nodes); + check_postorder_foreach(nr_nodes); } static void check_augmented(int nr_nodes) -- cgit v1.2.3 From 2a1d689c9ba42a6066540fb221b6ecbd6298b728 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 23 Jan 2014 15:56:20 -0800 Subject: lib/decompress_unlz4.c: always set an error return code on failures "ret", being set to -1 early on, gets cleared by the first invocation of lz4_decompress()/lz4_decompress_unknownoutputsize(), and hence subsequent failures wouldn't be noticed by the caller without setting it back to -1 right after those calls. Reported-by: Matthew Daley Signed-off-by: Jan Beulich Cc: Kyungsik Lee Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/decompress_unlz4.c | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/decompress_unlz4.c b/lib/decompress_unlz4.c index 3e67cfad16ad..7d1e83caf8ad 100644 --- a/lib/decompress_unlz4.c +++ b/lib/decompress_unlz4.c @@ -141,6 +141,7 @@ STATIC inline int INIT unlz4(u8 *input, int in_len, goto exit_2; } + ret = -1; if (flush && flush(outp, dest_len) != dest_len) goto exit_2; if (output) -- cgit v1.2.3 From 555b270e25b0279b98083518a85f4b1da144a181 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Mon, 20 Jan 2014 03:36:24 +0000 Subject: iscsi-target: Fix connection reset hang with percpu_ida_alloc This patch addresses a bug where connection reset would hang indefinately once percpu_ida_alloc() was starved for tags, due to the fact that it always assumed uninterruptible sleep mode. So now make percpu_ida_alloc() check for signal_pending_state() for making interruptible sleep optional, and convert iscsit_allocate_cmd() to set TASK_INTERRUPTIBLE for GFP_KERNEL, or TASK_RUNNING for GFP_ATOMIC. Reported-by: Linus Torvalds Cc: Kent Overstreet Cc: #3.12+ Signed-off-by: Nicholas Bellinger --- lib/percpu_ida.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c index 58b671484ac2..7be235f1a70b 100644 --- a/lib/percpu_ida.c +++ b/lib/percpu_ida.c @@ -138,14 +138,14 @@ static inline unsigned alloc_local_tag(struct percpu_ida_cpu *tags) * tag_pool_init()), or otherwise -ENOSPC on allocation failure. * * Safe to be called from interrupt context (assuming it isn't passed - * TASK_UNINTERRUPTIBLE, of course). + * TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, of course). * * @gfp indicates whether or not to wait until a free id is available (it's not * used for internal memory allocations); thus if passed __GFP_WAIT we may sleep * however long it takes until another thread frees an id (same semantics as a * mempool). * - * Will not fail if passed TASK_UNINTERRUPTIBLE. + * Will not fail if passed TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE. */ int percpu_ida_alloc(struct percpu_ida *pool, int state) { @@ -195,6 +195,11 @@ int percpu_ida_alloc(struct percpu_ida *pool, int state) if (tag >= 0 || state == TASK_RUNNING) break; + if (signal_pending_state(state, current)) { + tag = -ERESTARTSYS; + break; + } + schedule(); local_irq_save(flags); -- cgit v1.2.3 From ad6492b80f60a2139fa9bf8fd79b182fe5e3647c Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 27 Jan 2014 17:06:49 -0800 Subject: memblock, nobootmem: add memblock_virt_alloc_low() The new memblock_virt APIs are used to replaced old bootmem API. We need to allocate page below 4G for swiotlb. That should fix regression on Andrew's system that is using swiotlb. Signed-off-by: Yinghai Lu Cc: Russell King Cc: Konrad Rzeszutek Wilk Acked-by: Santosh Shilimkar Cc: Dave Hansen Cc: Ingo Molnar Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/swiotlb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 2e1c102759ce..b604b831f4d1 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -172,7 +172,7 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) /* * Get the overflow emergency buffer */ - v_overflow_buffer = memblock_virt_alloc_nopanic( + v_overflow_buffer = memblock_virt_alloc_low_nopanic( PAGE_ALIGN(io_tlb_overflow), PAGE_SIZE); if (!v_overflow_buffer) @@ -220,7 +220,7 @@ swiotlb_init(int verbose) bytes = io_tlb_nslabs << IO_TLB_SHIFT; /* Get IO TLB memory from the low pages */ - vstart = memblock_virt_alloc_nopanic(PAGE_ALIGN(bytes), PAGE_SIZE); + vstart = memblock_virt_alloc_low_nopanic(PAGE_ALIGN(bytes), PAGE_SIZE); if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose)) return; -- cgit v1.2.3 From d9e133e6f05fbb39e2ecf7bc1edca299729a8595 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Mon, 27 Jan 2014 17:06:57 -0800 Subject: dynamic_debug: remove wrong error message parse_lineno() returns either negative error code or zero. We don't need to print something here because if parse_lineno fails it will print error message. Signed-off-by: Andrey Ryabinin Cc: Jason Baron Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/dynamic_debug.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index 600ac57e2777..f959c39cc007 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -348,10 +348,8 @@ static int ddebug_parse_query(char *words[], int nwords, } if (last) *last++ = '\0'; - if (parse_lineno(first, &query->first_lineno) < 0) { - pr_err("line-number is <0\n"); + if (parse_lineno(first, &query->first_lineno) < 0) return -EINVAL; - } if (last) { /* range - */ if (parse_lineno(last, &query->last_lineno) -- cgit v1.2.3 From 3ace678fd1b246b75e01eeac0554de35656136a4 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Mon, 27 Jan 2014 17:06:58 -0800 Subject: dynamic_debug: fix ddebug_parse_query() This fixes following scenario: $ echo 'file dynamic_debug.c line 1-123 +p' > /sys/kernel/debug/dynamic_debug/control -bash: echo: write error: Invalid argument $ dmesg | grep dynamic_debug dynamic_debug:ddebug_parse_query: last-line:123 < 1st-line:1 dynamic_debug:ddebug_parse_query: query parse failed Signed-off-by: Andrey Ryabinin Cc: Jason Baron Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/dynamic_debug.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index f959c39cc007..e488d9a03adc 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -352,8 +352,10 @@ static int ddebug_parse_query(char *words[], int nwords, return -EINVAL; if (last) { /* range - */ - if (parse_lineno(last, &query->last_lineno) - < query->first_lineno) { + if (parse_lineno(last, &query->last_lineno) < 0) + return -EINVAL; + + if (query->last_lineno < query->first_lineno) { pr_err("last-line:%d < 1st-line:%d\n", query->last_lineno, query->first_lineno); -- cgit v1.2.3 From 4592599af36f50ed2d3502ed1b2374f5af6cb1ae Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Mon, 27 Jan 2014 17:06:59 -0800 Subject: dynamic_debug: replace obselete simple_strtoul() with kstrtouint() Signed-off-by: Andrey Ryabinin Cc: Jason Baron Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/dynamic_debug.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index e488d9a03adc..7288e38e1757 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -268,14 +268,12 @@ static int ddebug_tokenize(char *buf, char *words[], int maxwords) */ static inline int parse_lineno(const char *str, unsigned int *val) { - char *end = NULL; BUG_ON(str == NULL); if (*str == '\0') { *val = 0; return 0; } - *val = simple_strtoul(str, &end, 10); - if (end == NULL || end == str || *end != '\0') { + if (kstrtouint(str, 10, val) < 0) { pr_err("bad line-number: %s\n", str); return -EINVAL; } -- cgit v1.2.3 From 29dfe2dc0e8f85c5656d13bb4c78a5ffca54c452 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Fri, 1 Nov 2013 13:06:56 -0400 Subject: kobject: export kobj_sysfs_ops struct kobj_attribute implements the baseline attribute functionality that can be used all over the place. We should export the ops associated with it. Signed-off-by: Jeff Mahoney Signed-off-by: Greg Kroah-Hartman Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- lib/kobject.c | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/kobject.c b/lib/kobject.c index 5b4b8886435e..03512a40a3ef 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -758,6 +758,7 @@ const struct sysfs_ops kobj_sysfs_ops = { .show = kobj_attr_show, .store = kobj_attr_store, }; +EXPORT_SYMBOL_GPL(kobj_sysfs_ops); /** * kobj_completion_init - initialize a kobj_completion object. -- cgit v1.2.3 From 0368dfd01ae3b7647ef9b2f0525fdefd5e0d28e1 Mon Sep 17 00:00:00 2001 From: "Lad, Prabhakar" Date: Wed, 29 Jan 2014 14:05:37 -0800 Subject: lib/genalloc.c: add check gen_pool_dma_alloc() if dma pointer is not NULL In the gen_pool_dma_alloc() the dma pointer can be NULL and while assigning gen_pool_virt_to_phys(pool, vaddr) to dma caused the following crash on da850 evm: Unable to handle kernel NULL pointer dereference at virtual address 00000000 Internal error: Oops: 805 [#1] PREEMPT ARM Modules linked in: CPU: 0 PID: 1 Comm: swapper Tainted: G W 3.13.0-rc1-00001-g0609e45-dirty #5 task: c4830000 ti: c4832000 task.ti: c4832000 PC is at gen_pool_dma_alloc+0x30/0x3c LR is at gen_pool_virt_to_phys+0x74/0x80 Process swapper, call trace: gen_pool_dma_alloc+0x30/0x3c davinci_pm_probe+0x40/0xa8 platform_drv_probe+0x1c/0x4c driver_probe_device+0x98/0x22c __driver_attach+0x8c/0x90 bus_for_each_dev+0x6c/0x8c bus_add_driver+0x124/0x1d4 driver_register+0x78/0xf8 platform_driver_probe+0x20/0xa4 davinci_init_late+0xc/0x14 init_machine_late+0x1c/0x28 do_one_initcall+0x34/0x15c kernel_init_freeable+0xe4/0x1ac kernel_init+0x8/0xec This patch fixes the above. [akpm@linux-foundation.org: update kerneldoc] Signed-off-by: Lad, Prabhakar Cc: Philipp Zabel Cc: Nicolin Chen Cc: Joe Perches Cc: Sachin Kamat Cc: [3.13.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/genalloc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/genalloc.c b/lib/genalloc.c index dda31168844f..bdb9a456bcbb 100644 --- a/lib/genalloc.c +++ b/lib/genalloc.c @@ -316,7 +316,7 @@ EXPORT_SYMBOL(gen_pool_alloc); * gen_pool_dma_alloc - allocate special memory from the pool for DMA usage * @pool: pool to allocate from * @size: number of bytes to allocate from the pool - * @dma: dma-view physical address + * @dma: dma-view physical address return value. Use NULL if unneeded. * * Allocate the requested number of bytes from the specified pool. * Uses the pool allocation function (with first-fit algorithm by default). @@ -334,7 +334,8 @@ void *gen_pool_dma_alloc(struct gen_pool *pool, size_t size, dma_addr_t *dma) if (!vaddr) return NULL; - *dma = gen_pool_virt_to_phys(pool, vaddr); + if (dma) + *dma = gen_pool_virt_to_phys(pool, vaddr); return (void *)vaddr; } -- cgit v1.2.3 From 59f2e7df574c78e952d79435de3f4867349403aa Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 29 Jan 2014 14:05:53 -0800 Subject: dma-debug: fix overlap detection Commit 0abdd7a81b7e ("dma-debug: introduce debug_dma_assert_idle()") was reworked to expand the overlap counter to the full range expressable by 3 tag bits, but it has a thinko in treating the overlap counter as a pure reference count for the entry. Instead of deleting when the reference-count drops to zero, we need to delete when the overlap-count drops below zero. Also, when detecting overflow we can just test the overlap-count > MAX rather than applying special meaning to 0. Regression report available here: http://marc.info/?l=linux-netdev&m=139073373932386&w=2 This patch, now tested on the original net_dma case, sees the expected handful of reports before the eventual data corruption occurs. Signed-off-by: Dan Williams Reported-by: Sander Eikelenboom Cc: Francois Romieu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/dma-debug.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/dma-debug.c b/lib/dma-debug.c index c38083871f11..2defd1308b04 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -463,7 +463,7 @@ static int active_pfn_set_overlap(unsigned long pfn, int overlap) int i; if (overlap > ACTIVE_PFN_MAX_OVERLAP || overlap < 0) - return 0; + return overlap; for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--) if (overlap & 1 << i) @@ -486,7 +486,7 @@ static void active_pfn_inc_overlap(unsigned long pfn) * debug_dma_assert_idle() as the pfn may be marked idle * prematurely. */ - WARN_ONCE(overlap == 0, + WARN_ONCE(overlap > ACTIVE_PFN_MAX_OVERLAP, "DMA-API: exceeded %d overlapping mappings of pfn %lx\n", ACTIVE_PFN_MAX_OVERLAP, pfn); } @@ -517,7 +517,11 @@ static void active_pfn_remove(struct dma_debug_entry *entry) unsigned long flags; spin_lock_irqsave(&radix_lock, flags); - if (active_pfn_dec_overlap(entry->pfn) == 0) + /* since we are counting overlaps the final put of the + * entry->pfn will occur when the overlap count is 0. + * active_pfn_dec_overlap() returns -1 in that case + */ + if (active_pfn_dec_overlap(entry->pfn) < 0) radix_tree_delete(&dma_active_pfn, entry->pfn); spin_unlock_irqrestore(&radix_lock, flags); } -- cgit v1.2.3 From 8a10bc9d27ceb084b0d8be621a033a475eb9fdfd Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 31 Jan 2014 15:39:40 +0100 Subject: parisc/sti_console: prefer Linux fonts over built-in ROM fonts The built-in ROM fonts lack many necessary ASCII characters, which is why it makes sens to prefer the Linux fonts instead if they are available. This makes consoles on STI graphics cards which are not supported by the stifb driver (e.g. Visualize FXe) looks much nicer. Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # v3.13 --- lib/fonts/Kconfig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/fonts/Kconfig b/lib/fonts/Kconfig index 34fd931b54b5..4dc1b990aa23 100644 --- a/lib/fonts/Kconfig +++ b/lib/fonts/Kconfig @@ -9,7 +9,7 @@ if FONT_SUPPORT config FONTS bool "Select compiled-in fonts" - depends on FRAMEBUFFER_CONSOLE || STI_CONSOLE + depends on FRAMEBUFFER_CONSOLE help Say Y here if you would like to use fonts other than the default your frame buffer console usually use. @@ -22,7 +22,7 @@ config FONTS config FONT_8x8 bool "VGA 8x8 font" if FONTS - depends on FRAMEBUFFER_CONSOLE || STI_CONSOLE + depends on FRAMEBUFFER_CONSOLE default y if !SPARC && !FONTS help This is the "high resolution" font for the VGA frame buffer (the one @@ -45,7 +45,7 @@ config FONT_8x16 config FONT_6x11 bool "Mac console 6x11 font (not supported by all drivers)" if FONTS - depends on FRAMEBUFFER_CONSOLE || STI_CONSOLE + depends on FRAMEBUFFER_CONSOLE default y if !SPARC && !FONTS && MAC help Small console font with Macintosh-style high-half glyphs. Some Mac -- cgit v1.2.3