From ce79ddc8e2376a9a93c7d42daf89bfcbb9187e62 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Mon, 23 Nov 2009 22:01:15 +0200 Subject: SLAB: Fix lockdep annotations for CPU hotplug As reported by Paul McKenney: I am seeing some lockdep complaints in rcutorture runs that include frequent CPU-hotplug operations. The tests are otherwise successful. My first thought was to send a patch that gave each array_cache structure's ->lock field its own struct lock_class_key, but you already have a init_lock_keys() that seems to be intended to deal with this. ------------------------------------------------------------------------ ============================================= [ INFO: possible recursive locking detected ] 2.6.32-rc4-autokern1 #1 --------------------------------------------- syslogd/2908 is trying to acquire lock: (&nc->lock){..-...}, at: [] .kmem_cache_free+0x118/0x2d4 but task is already holding lock: (&nc->lock){..-...}, at: [] .kfree+0x1f0/0x324 other info that might help us debug this: 3 locks held by syslogd/2908: #0: (&u->readlock){+.+.+.}, at: [] .unix_dgram_recvmsg+0x70/0x338 #1: (&nc->lock){..-...}, at: [] .kfree+0x1f0/0x324 #2: (&parent->list_lock){-.-...}, at: [] .__drain_alien_cache+0x50/0xb8 stack backtrace: Call Trace: [c0000000e8ccafc0] [c0000000000101e4] .show_stack+0x70/0x184 (unreliable) [c0000000e8ccb070] [c0000000000afebc] .validate_chain+0x6ec/0xf58 [c0000000e8ccb180] [c0000000000b0ff0] .__lock_acquire+0x8c8/0x974 [c0000000e8ccb280] [c0000000000b2290] .lock_acquire+0x140/0x18c [c0000000e8ccb350] [c000000000468df0] ._spin_lock+0x48/0x70 [c0000000e8ccb3e0] [c0000000001407f4] .kmem_cache_free+0x118/0x2d4 [c0000000e8ccb4a0] [c000000000140b90] .free_block+0x130/0x1a8 [c0000000e8ccb540] [c000000000140f94] .__drain_alien_cache+0x80/0xb8 [c0000000e8ccb5e0] [c0000000001411e0] .kfree+0x214/0x324 [c0000000e8ccb6a0] [c0000000003ca860] .skb_release_data+0xe8/0x104 [c0000000e8ccb730] [c0000000003ca2ec] .__kfree_skb+0x20/0xd4 [c0000000e8ccb7b0] [c0000000003cf2c8] .skb_free_datagram+0x1c/0x5c [c0000000e8ccb830] [c00000000045597c] .unix_dgram_recvmsg+0x2f4/0x338 [c0000000e8ccb920] [c0000000003c0f14] .sock_recvmsg+0xf4/0x13c [c0000000e8ccbb30] [c0000000003c28ec] .SyS_recvfrom+0xb4/0x130 [c0000000e8ccbcb0] [c0000000003bfb78] .sys_recv+0x18/0x2c [c0000000e8ccbd20] [c0000000003ed388] .compat_sys_recv+0x14/0x28 [c0000000e8ccbd90] [c0000000003ee1bc] .compat_sys_socketcall+0x178/0x220 [c0000000e8ccbe30] [c0000000000085d4] syscall_exit+0x0/0x40 This patch fixes the issue by setting up lockdep annotations during CPU hotplug. Reported-by: Paul E. McKenney Tested-by: Paul E. McKenney Cc: Peter Zijlstra Cc: Christoph Lameter Signed-off-by: Pekka Enberg --- mm/slab.c | 108 +++++++++++++++++++++++++++++++++++--------------------------- 1 file changed, 61 insertions(+), 47 deletions(-) (limited to 'mm/slab.c') diff --git a/mm/slab.c b/mm/slab.c index 7dfa481c96ba..84de47e350dd 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -604,6 +604,26 @@ static struct kmem_cache cache_cache = { #define BAD_ALIEN_MAGIC 0x01020304ul +/* + * chicken and egg problem: delay the per-cpu array allocation + * until the general caches are up. + */ +static enum { + NONE, + PARTIAL_AC, + PARTIAL_L3, + EARLY, + FULL +} g_cpucache_up; + +/* + * used by boot code to determine if it can use slab based allocator + */ +int slab_is_available(void) +{ + return g_cpucache_up >= EARLY; +} + #ifdef CONFIG_LOCKDEP /* @@ -620,40 +640,52 @@ static struct kmem_cache cache_cache = { static struct lock_class_key on_slab_l3_key; static struct lock_class_key on_slab_alc_key; -static inline void init_lock_keys(void) - +static void init_node_lock_keys(int q) { - int q; struct cache_sizes *s = malloc_sizes; - while (s->cs_size != ULONG_MAX) { - for_each_node(q) { - struct array_cache **alc; - int r; - struct kmem_list3 *l3 = s->cs_cachep->nodelists[q]; - if (!l3 || OFF_SLAB(s->cs_cachep)) - continue; - lockdep_set_class(&l3->list_lock, &on_slab_l3_key); - alc = l3->alien; - /* - * FIXME: This check for BAD_ALIEN_MAGIC - * should go away when common slab code is taught to - * work even without alien caches. - * Currently, non NUMA code returns BAD_ALIEN_MAGIC - * for alloc_alien_cache, - */ - if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC) - continue; - for_each_node(r) { - if (alc[r]) - lockdep_set_class(&alc[r]->lock, - &on_slab_alc_key); - } + if (g_cpucache_up != FULL) + return; + + for (s = malloc_sizes; s->cs_size != ULONG_MAX; s++) { + struct array_cache **alc; + struct kmem_list3 *l3; + int r; + + l3 = s->cs_cachep->nodelists[q]; + if (!l3 || OFF_SLAB(s->cs_cachep)) + return; + lockdep_set_class(&l3->list_lock, &on_slab_l3_key); + alc = l3->alien; + /* + * FIXME: This check for BAD_ALIEN_MAGIC + * should go away when common slab code is taught to + * work even without alien caches. + * Currently, non NUMA code returns BAD_ALIEN_MAGIC + * for alloc_alien_cache, + */ + if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC) + return; + for_each_node(r) { + if (alc[r]) + lockdep_set_class(&alc[r]->lock, + &on_slab_alc_key); } - s++; } } + +static inline void init_lock_keys(void) +{ + int node; + + for_each_node(node) + init_node_lock_keys(node); +} #else +static void init_node_lock_keys(int q) +{ +} + static inline void init_lock_keys(void) { } @@ -665,26 +697,6 @@ static inline void init_lock_keys(void) static DEFINE_MUTEX(cache_chain_mutex); static struct list_head cache_chain; -/* - * chicken and egg problem: delay the per-cpu array allocation - * until the general caches are up. - */ -static enum { - NONE, - PARTIAL_AC, - PARTIAL_L3, - EARLY, - FULL -} g_cpucache_up; - -/* - * used by boot code to determine if it can use slab based allocator - */ -int slab_is_available(void) -{ - return g_cpucache_up >= EARLY; -} - static DEFINE_PER_CPU(struct delayed_work, reap_work); static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) @@ -1254,6 +1266,8 @@ static int __cpuinit cpuup_prepare(long cpu) kfree(shared); free_alien_cache(alien); } + init_node_lock_keys(node); + return 0; bad: cpuup_canceled(cpu); -- cgit v1.2.3 From 8e15b79cf4bd20c6afb4663d98a39cd004eee672 Mon Sep 17 00:00:00 2001 From: Tim Blechmann Date: Mon, 30 Nov 2009 18:59:34 +0100 Subject: SLAB: Fix unlikely() annotation in __cache_alloc_node() Branch profiling on my nehalem machine showed 99% incorrect branch hints: 28459 7678524 99 __cache_alloc_node slab.c 3551 Discussion on lkml [1] led to the solution to remove this hint. [1] http://patchwork.kernel.org/patch/63517/ Signed-off-by: Tim Blechmann Signed-off-by: Pekka Enberg --- mm/slab.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/slab.c') diff --git a/mm/slab.c b/mm/slab.c index 84de47e350dd..a07540e5843b 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3320,7 +3320,7 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, cache_alloc_debugcheck_before(cachep, flags); local_irq_save(save_flags); - if (unlikely(nodeid == -1)) + if (nodeid == -1) nodeid = numa_node_id(); if (unlikely(!cachep->nodelists[nodeid])) { -- cgit v1.2.3 From f3d8b53a3abbfd0b74fa5dfaa690870d9619fad9 Mon Sep 17 00:00:00 2001 From: "J. R. Okajima" Date: Wed, 2 Dec 2009 16:55:49 +0900 Subject: slab, kmemleak: stop calling kmemleak_erase() unconditionally When the gotten object is NULL (probably due to ENOMEM), kmemleak_erase() is unnecessary here, It just sets NULL to where already is NULL. Add a condition. Acked-by: Catalin Marinas Signed-off-by: J. R. Okajima Signed-off-by: Pekka Enberg --- mm/slab.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'mm/slab.c') diff --git a/mm/slab.c b/mm/slab.c index 7dfa481c96ba..4e61449d7946 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3109,7 +3109,8 @@ static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags) * per-CPU caches is leaked, we need to make sure kmemleak doesn't * treat the array pointers as a reference to the object. */ - kmemleak_erase(&ac->entry[ac->avail]); + if (objp) + kmemleak_erase(&ac->entry[ac->avail]); return objp; } -- cgit v1.2.3 From ddbf2e8366f2a7fa3419be418cfd83a914d2527f Mon Sep 17 00:00:00 2001 From: "J. R. Okajima" Date: Wed, 2 Dec 2009 16:55:50 +0900 Subject: slab, kmemleak: pass the correct pointer to kmemleak_erase() In ____cache_alloc(), the variable 'ac' may be changed after cache_alloc_refill() and the following kmemleak_erase() may get an incorrect pointer. Update 'ac' after cache_alloc_refill() unconditionally. See the following URL for the discussion of this patch: http://marc.info/?l=linux-kernel&m=125873373124187&w=2 Acked-by: Catalin Marinas Signed-off-by: J. R. Okajima Signed-off-by: Pekka Enberg --- mm/slab.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'mm/slab.c') diff --git a/mm/slab.c b/mm/slab.c index 4e61449d7946..66e90477a4bb 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3103,6 +3103,11 @@ static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags) } else { STATS_INC_ALLOCMISS(cachep); objp = cache_alloc_refill(cachep, flags); + /* + * the 'ac' may be updated by cache_alloc_refill(), + * and kmemleak_erase() requires its correct value. + */ + ac = cpu_cache_get(cachep); } /* * To avoid a false negative, if an object that is in one of the -- cgit v1.2.3