From 95a05b428cc675694321c8f762591984f3fd2b1e Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Thu, 10 Jan 2013 19:14:19 +0000 Subject: slab: Common constants for kmalloc boundaries Standardize the constants that describe the smallest and largest object kept in the kmalloc arrays for SLAB and SLUB. Differentiate between the maximum size for which a slab cache is used (KMALLOC_MAX_CACHE_SIZE) and the maximum allocatable size (KMALLOC_MAX_SIZE, KMALLOC_MAX_ORDER). Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- mm/slub.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'mm/slub.c') diff --git a/mm/slub.c b/mm/slub.c index ba2ca53f6c3a..d0f72ee06310 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2775,7 +2775,7 @@ init_kmem_cache_node(struct kmem_cache_node *n) static inline int alloc_kmem_cache_cpus(struct kmem_cache *s) { BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE < - SLUB_PAGE_SHIFT * sizeof(struct kmem_cache_cpu)); + KMALLOC_SHIFT_HIGH * sizeof(struct kmem_cache_cpu)); /* * Must align to double word boundary for the double cmpxchg @@ -3174,11 +3174,11 @@ int __kmem_cache_shutdown(struct kmem_cache *s) * Kmalloc subsystem *******************************************************************/ -struct kmem_cache *kmalloc_caches[SLUB_PAGE_SHIFT]; +struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1]; EXPORT_SYMBOL(kmalloc_caches); #ifdef CONFIG_ZONE_DMA -static struct kmem_cache *kmalloc_dma_caches[SLUB_PAGE_SHIFT]; +static struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1]; #endif static int __init setup_slub_min_order(char *str) @@ -3280,7 +3280,7 @@ void *__kmalloc(size_t size, gfp_t flags) struct kmem_cache *s; void *ret; - if (unlikely(size > SLUB_MAX_SIZE)) + if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) return kmalloc_large(size, flags); s = get_slab(size, flags); @@ -3316,7 +3316,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node) struct kmem_cache *s; void *ret; - if (unlikely(size > SLUB_MAX_SIZE)) { + if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) { ret = kmalloc_large_node(size, flags, node); trace_kmalloc_node(_RET_IP_, ret, @@ -3721,7 +3721,7 @@ void __init kmem_cache_init(void) caches++; } - for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) { + for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) { kmalloc_caches[i] = create_kmalloc_cache("kmalloc", 1 << i, 0); caches++; } @@ -3739,7 +3739,7 @@ void __init kmem_cache_init(void) BUG_ON(!kmalloc_caches[2]->name); } - for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) { + for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) { char *s = kasprintf(GFP_NOWAIT, "kmalloc-%d", 1 << i); BUG_ON(!s); @@ -3751,7 +3751,7 @@ void __init kmem_cache_init(void) #endif #ifdef CONFIG_ZONE_DMA - for (i = 0; i < SLUB_PAGE_SHIFT; i++) { + for (i = 0; i <= KMALLOC_SHIFT_HIGH; i++) { struct kmem_cache *s = kmalloc_caches[i]; if (s && s->size) { @@ -3930,7 +3930,7 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller) struct kmem_cache *s; void *ret; - if (unlikely(size > SLUB_MAX_SIZE)) + if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) return kmalloc_large(size, gfpflags); s = get_slab(size, gfpflags); @@ -3953,7 +3953,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, struct kmem_cache *s; void *ret; - if (unlikely(size > SLUB_MAX_SIZE)) { + if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) { ret = kmalloc_large_node(size, gfpflags, node); trace_kmalloc_node(caller, ret, @@ -4312,7 +4312,7 @@ static void resiliency_test(void) { u8 *p; - BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || SLUB_PAGE_SHIFT < 10); + BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || KMALLOC_SHIFT_HIGH < 10); printk(KERN_ERR "SLUB resiliency testing\n"); printk(KERN_ERR "-----------------------\n"); -- cgit v1.2.3 From 9425c58e5445277699ff3c2a87bac1cfebc1b48d Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Thu, 10 Jan 2013 19:12:17 +0000 Subject: slab: Common definition for the array of kmalloc caches Have a common definition fo the kmalloc cache arrays in SLAB and SLUB Acked-by: Glauber Costa Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- mm/slub.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'mm/slub.c') diff --git a/mm/slub.c b/mm/slub.c index d0f72ee06310..527cbfb5c49b 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3174,13 +3174,6 @@ int __kmem_cache_shutdown(struct kmem_cache *s) * Kmalloc subsystem *******************************************************************/ -struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1]; -EXPORT_SYMBOL(kmalloc_caches); - -#ifdef CONFIG_ZONE_DMA -static struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1]; -#endif - static int __init setup_slub_min_order(char *str) { get_option(&str, &slub_min_order); -- cgit v1.2.3 From f97d5f634d3b5133951424fae751db1f339548bd Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Thu, 10 Jan 2013 19:12:17 +0000 Subject: slab: Common function to create the kmalloc array The kmalloc array is created in similar ways in both SLAB and SLUB. Create a common function and have both allocators call that function. V1->V2: Whitespace cleanup Reviewed-by: Glauber Costa Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- mm/slub.c | 55 +++---------------------------------------------------- 1 file changed, 3 insertions(+), 52 deletions(-) (limited to 'mm/slub.c') diff --git a/mm/slub.c b/mm/slub.c index 527cbfb5c49b..e813c2d30fe0 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3633,7 +3633,6 @@ void __init kmem_cache_init(void) static __initdata struct kmem_cache boot_kmem_cache, boot_kmem_cache_node; int i; - int caches = 2; if (debug_guardpage_minorder()) slub_max_order = 0; @@ -3703,64 +3702,16 @@ void __init kmem_cache_init(void) size_index[size_index_elem(i)] = 8; } - /* Caches that are not of the two-to-the-power-of size */ - if (KMALLOC_MIN_SIZE <= 32) { - kmalloc_caches[1] = create_kmalloc_cache("kmalloc-96", 96, 0); - caches++; - } - - if (KMALLOC_MIN_SIZE <= 64) { - kmalloc_caches[2] = create_kmalloc_cache("kmalloc-192", 192, 0); - caches++; - } - - for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) { - kmalloc_caches[i] = create_kmalloc_cache("kmalloc", 1 << i, 0); - caches++; - } - - slab_state = UP; - - /* Provide the correct kmalloc names now that the caches are up */ - if (KMALLOC_MIN_SIZE <= 32) { - kmalloc_caches[1]->name = kstrdup(kmalloc_caches[1]->name, GFP_NOWAIT); - BUG_ON(!kmalloc_caches[1]->name); - } - - if (KMALLOC_MIN_SIZE <= 64) { - kmalloc_caches[2]->name = kstrdup(kmalloc_caches[2]->name, GFP_NOWAIT); - BUG_ON(!kmalloc_caches[2]->name); - } - - for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) { - char *s = kasprintf(GFP_NOWAIT, "kmalloc-%d", 1 << i); - - BUG_ON(!s); - kmalloc_caches[i]->name = s; - } + create_kmalloc_caches(0); #ifdef CONFIG_SMP register_cpu_notifier(&slab_notifier); #endif -#ifdef CONFIG_ZONE_DMA - for (i = 0; i <= KMALLOC_SHIFT_HIGH; i++) { - struct kmem_cache *s = kmalloc_caches[i]; - - if (s && s->size) { - char *name = kasprintf(GFP_NOWAIT, - "dma-kmalloc-%d", s->object_size); - - BUG_ON(!name); - kmalloc_dma_caches[i] = create_kmalloc_cache(name, - s->object_size, SLAB_CACHE_DMA); - } - } -#endif printk(KERN_INFO - "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d," + "SLUB: HWalign=%d, Order=%d-%d, MinObjects=%d," " CPUs=%d, Nodes=%d\n", - caches, cache_line_size(), + cache_line_size(), slub_min_order, slub_max_order, slub_min_objects, nr_cpu_ids, nr_node_ids); } -- cgit v1.2.3 From 2c59dd6544212faa5ce761920d2251f4152f408d Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Thu, 10 Jan 2013 19:14:19 +0000 Subject: slab: Common Kmalloc cache determination Extract the optimized lookup functions from slub and put them into slab_common.c. Then make slab use these functions as well. Joonsoo notes that this fixes some issues with constant folding which also reduces the code size for slub. https://lkml.org/lkml/2012/10/20/82 Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- mm/slub.c | 108 +++----------------------------------------------------------- 1 file changed, 5 insertions(+), 103 deletions(-) (limited to 'mm/slub.c') diff --git a/mm/slub.c b/mm/slub.c index e813c2d30fe0..6184b0821f7e 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2982,7 +2982,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) s->allocflags |= __GFP_COMP; if (s->flags & SLAB_CACHE_DMA) - s->allocflags |= SLUB_DMA; + s->allocflags |= GFP_DMA; if (s->flags & SLAB_RECLAIM_ACCOUNT) s->allocflags |= __GFP_RECLAIMABLE; @@ -3210,64 +3210,6 @@ static int __init setup_slub_nomerge(char *str) __setup("slub_nomerge", setup_slub_nomerge); -/* - * Conversion table for small slabs sizes / 8 to the index in the - * kmalloc array. This is necessary for slabs < 192 since we have non power - * of two cache sizes there. The size of larger slabs can be determined using - * fls. - */ -static s8 size_index[24] = { - 3, /* 8 */ - 4, /* 16 */ - 5, /* 24 */ - 5, /* 32 */ - 6, /* 40 */ - 6, /* 48 */ - 6, /* 56 */ - 6, /* 64 */ - 1, /* 72 */ - 1, /* 80 */ - 1, /* 88 */ - 1, /* 96 */ - 7, /* 104 */ - 7, /* 112 */ - 7, /* 120 */ - 7, /* 128 */ - 2, /* 136 */ - 2, /* 144 */ - 2, /* 152 */ - 2, /* 160 */ - 2, /* 168 */ - 2, /* 176 */ - 2, /* 184 */ - 2 /* 192 */ -}; - -static inline int size_index_elem(size_t bytes) -{ - return (bytes - 1) / 8; -} - -static struct kmem_cache *get_slab(size_t size, gfp_t flags) -{ - int index; - - if (size <= 192) { - if (!size) - return ZERO_SIZE_PTR; - - index = size_index[size_index_elem(size)]; - } else - index = fls(size - 1); - -#ifdef CONFIG_ZONE_DMA - if (unlikely((flags & SLUB_DMA))) - return kmalloc_dma_caches[index]; - -#endif - return kmalloc_caches[index]; -} - void *__kmalloc(size_t size, gfp_t flags) { struct kmem_cache *s; @@ -3276,7 +3218,7 @@ void *__kmalloc(size_t size, gfp_t flags) if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) return kmalloc_large(size, flags); - s = get_slab(size, flags); + s = kmalloc_slab(size, flags); if (unlikely(ZERO_OR_NULL_PTR(s))) return s; @@ -3319,7 +3261,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node) return ret; } - s = get_slab(size, flags); + s = kmalloc_slab(size, flags); if (unlikely(ZERO_OR_NULL_PTR(s))) return s; @@ -3632,7 +3574,6 @@ void __init kmem_cache_init(void) { static __initdata struct kmem_cache boot_kmem_cache, boot_kmem_cache_node; - int i; if (debug_guardpage_minorder()) slub_max_order = 0; @@ -3663,45 +3604,6 @@ void __init kmem_cache_init(void) kmem_cache_node = bootstrap(&boot_kmem_cache_node); /* Now we can use the kmem_cache to allocate kmalloc slabs */ - - /* - * Patch up the size_index table if we have strange large alignment - * requirements for the kmalloc array. This is only the case for - * MIPS it seems. The standard arches will not generate any code here. - * - * Largest permitted alignment is 256 bytes due to the way we - * handle the index determination for the smaller caches. - * - * Make sure that nothing crazy happens if someone starts tinkering - * around with ARCH_KMALLOC_MINALIGN - */ - BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 || - (KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1))); - - for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) { - int elem = size_index_elem(i); - if (elem >= ARRAY_SIZE(size_index)) - break; - size_index[elem] = KMALLOC_SHIFT_LOW; - } - - if (KMALLOC_MIN_SIZE == 64) { - /* - * The 96 byte size cache is not used if the alignment - * is 64 byte. - */ - for (i = 64 + 8; i <= 96; i += 8) - size_index[size_index_elem(i)] = 7; - } else if (KMALLOC_MIN_SIZE == 128) { - /* - * The 192 byte sized cache is not used if the alignment - * is 128 byte. Redirect kmalloc to use the 256 byte cache - * instead. - */ - for (i = 128 + 8; i <= 192; i += 8) - size_index[size_index_elem(i)] = 8; - } - create_kmalloc_caches(0); #ifdef CONFIG_SMP @@ -3877,7 +3779,7 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller) if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) return kmalloc_large(size, gfpflags); - s = get_slab(size, gfpflags); + s = kmalloc_slab(size, gfpflags); if (unlikely(ZERO_OR_NULL_PTR(s))) return s; @@ -3907,7 +3809,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, return ret; } - s = get_slab(size, gfpflags); + s = kmalloc_slab(size, gfpflags); if (unlikely(ZERO_OR_NULL_PTR(s))) return s; -- cgit v1.2.3 From 7d557b3cb69398d83ceabad9cf147c93a3aa97fd Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Fri, 22 Feb 2013 20:20:00 +0400 Subject: slub: correctly bootstrap boot caches After we create a boot cache, we may allocate from it until it is bootstraped. This will move the page from the partial list to the cpu slab list. If this happens, the loop: list_for_each_entry(p, &n->partial, lru) that we use to scan for all partial pages will yield nothing, and the pages will keep pointing to the boot cpu cache, which is of course, invalid. To do that, we should flush the cache to make sure that the cpu slab is back to the partial list. Signed-off-by: Glauber Costa Reported-by: Steffen Michalke Tested-by: KAMEZAWA Hiroyuki Acked-by: Christoph Lameter Cc: Andrew Morton Cc: Tejun Heo Signed-off-by: Pekka Enberg --- mm/slub.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'mm/slub.c') diff --git a/mm/slub.c b/mm/slub.c index 6184b0821f7e..aa0728daf8bb 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3552,6 +3552,12 @@ static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache) memcpy(s, static_cache, kmem_cache->object_size); + /* + * This runs very early, and only the boot processor is supposed to be + * up. Even if it weren't true, IRQs are not up so we couldn't fire + * IPIs around. + */ + __flush_cpu_slab(s, smp_processor_id()); for_each_node_state(node, N_NORMAL_MEMORY) { struct kmem_cache_node *n = get_node(s, node); struct page *p; -- cgit v1.2.3 From 633b076464da52b3c7bf0f62932fbfc0ea23d8b3 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Mon, 21 Jan 2013 17:01:25 +0900 Subject: slub: correct to calculate num of acquired objects in get_partial_node() There is a subtle bug when calculating a number of acquired objects. Currently, we calculate "available = page->objects - page->inuse", after acquire_slab() is called in get_partial_node(). In acquire_slab() with mode = 1, we always set new.inuse = page->objects. So, acquire_slab(s, n, page, object == NULL); if (!object) { c->page = page; stat(s, ALLOC_FROM_PARTIAL); object = t; available = page->objects - page->inuse; !!! availabe is always 0 !!! ... Therfore, "available > s->cpu_partial / 2" is always false and we always go to second iteration. This patch correct this problem. After that, we don't need return value of put_cpu_partial(). So remove it. Reviewed-by: Wanpeng Li Acked-by: Christoph Lameter Signed-off-by: Joonsoo Kim Signed-off-by: Pekka Enberg --- mm/slub.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'mm/slub.c') diff --git a/mm/slub.c b/mm/slub.c index aa0728daf8bb..8f73593d4f21 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1493,7 +1493,7 @@ static inline void remove_partial(struct kmem_cache_node *n, */ static inline void *acquire_slab(struct kmem_cache *s, struct kmem_cache_node *n, struct page *page, - int mode) + int mode, int *objects) { void *freelist; unsigned long counters; @@ -1507,6 +1507,7 @@ static inline void *acquire_slab(struct kmem_cache *s, freelist = page->freelist; counters = page->counters; new.counters = counters; + *objects = new.objects - new.inuse; if (mode) { new.inuse = page->objects; new.freelist = NULL; @@ -1528,7 +1529,7 @@ static inline void *acquire_slab(struct kmem_cache *s, return freelist; } -static int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain); +static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain); static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags); /* @@ -1539,6 +1540,8 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, { struct page *page, *page2; void *object = NULL; + int available = 0; + int objects; /* * Racy check. If we mistakenly see no partial slabs then we @@ -1552,22 +1555,21 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, spin_lock(&n->list_lock); list_for_each_entry_safe(page, page2, &n->partial, lru) { void *t; - int available; if (!pfmemalloc_match(page, flags)) continue; - t = acquire_slab(s, n, page, object == NULL); + t = acquire_slab(s, n, page, object == NULL, &objects); if (!t) break; + available += objects; if (!object) { c->page = page; stat(s, ALLOC_FROM_PARTIAL); object = t; - available = page->objects - page->inuse; } else { - available = put_cpu_partial(s, page, 0); + put_cpu_partial(s, page, 0); stat(s, CPU_PARTIAL_NODE); } if (kmem_cache_debug(s) || available > s->cpu_partial / 2) @@ -1946,7 +1948,7 @@ static void unfreeze_partials(struct kmem_cache *s, * If we did not find a slot then simply move all the partials to the * per node partial list. */ -static int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) +static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) { struct page *oldpage; int pages; @@ -1984,7 +1986,6 @@ static int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) page->next = oldpage; } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage); - return pobjects; } static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) -- cgit v1.2.3 From 338b2642290ef3193229ece8cfc776ac4fe8869d Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Mon, 21 Jan 2013 17:01:27 +0900 Subject: slub: add 'likely' macro to inc_slabs_node() After boot phase, 'n' always exist. So add 'likely' macro for helping compiler. Acked-by: Christoph Lameter Signed-off-by: Joonsoo Kim Signed-off-by: Pekka Enberg --- mm/slub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/slub.c') diff --git a/mm/slub.c b/mm/slub.c index 8f73593d4f21..21b3f004f614 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1005,7 +1005,7 @@ static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects) * dilemma by deferring the increment of the count during * bootstrap (see early_kmem_cache_node_alloc). */ - if (n) { + if (likely(n)) { atomic_long_inc(&n->nr_slabs); atomic_long_add(objects, &n->total_objects); } -- cgit v1.2.3 From 4d7868e6475d478172581828021bd8a356726679 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 23 Jan 2013 21:45:47 +0000 Subject: slub: Do not dereference NULL pointer in node_match The variables accessed in slab_alloc are volatile and therefore the page pointer passed to node_match can be NULL. The processing of data in slab_alloc is tentative until either the cmpxhchg succeeds or the __slab_alloc slowpath is invoked. Both are able to perform the same allocation from the freelist. Check for the NULL pointer in node_match. A false positive will lead to a retry of the loop in __slab_alloc. Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- mm/slub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/slub.c') diff --git a/mm/slub.c b/mm/slub.c index 21b3f004f614..8b1b99d399cb 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2042,7 +2042,7 @@ static void flush_all(struct kmem_cache *s) static inline int node_match(struct page *page, int node) { #ifdef CONFIG_NUMA - if (node != NUMA_NO_NODE && page_to_nid(page) != node) + if (!page || (node != NUMA_NO_NODE && page_to_nid(page) != node)) return 0; #endif return 1; -- cgit v1.2.3 From 7cccd80b4397699902aced1ad3d692d384aaab77 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 23 Jan 2013 21:45:48 +0000 Subject: slub: tid must be retrieved from the percpu area of the current processor As Steven Rostedt has pointer out: rescheduling could occur on a different processor after the determination of the per cpu pointer and before the tid is retrieved. This could result in allocation from the wrong node in slab_alloc(). The effect is much more severe in slab_free() where we could free to the freelist of the wrong page. The window for something like that occurring is pretty small but it is possible. Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- mm/slub.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'mm/slub.c') diff --git a/mm/slub.c b/mm/slub.c index 8b1b99d399cb..4df2c0c337fb 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2332,13 +2332,18 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s, s = memcg_kmem_get_cache(s, gfpflags); redo: - /* * Must read kmem_cache cpu data via this cpu ptr. Preemption is * enabled. We may switch back and forth between cpus while * reading from one cpu area. That does not matter as long * as we end up on the original cpu again when doing the cmpxchg. + * + * Preemption is disabled for the retrieval of the tid because that + * must occur from the current processor. We cannot allow rescheduling + * on a different processor between the determination of the pointer + * and the retrieval of the tid. */ + preempt_disable(); c = __this_cpu_ptr(s->cpu_slab); /* @@ -2348,7 +2353,7 @@ redo: * linked list in between. */ tid = c->tid; - barrier(); + preempt_enable(); object = c->freelist; page = c->page; @@ -2595,10 +2600,11 @@ redo: * data is retrieved via this pointer. If we are on the same cpu * during the cmpxchg then the free will succedd. */ + preempt_disable(); c = __this_cpu_ptr(s->cpu_slab); tid = c->tid; - barrier(); + preempt_enable(); if (likely(page == c->page)) { set_freepointer(s, object, c->freelist); -- cgit v1.2.3