summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig2
-rw-r--r--mm/filemap.c2
-rw-r--r--mm/memcontrol.c73
-rw-r--r--mm/memory.c58
-rw-r--r--mm/mmu_context.c2
-rw-r--r--mm/page_alloc.c138
-rw-r--r--mm/page_cgroup.c1
-rw-r--r--mm/slab.c266
-rw-r--r--mm/swap.c33
-rw-r--r--mm/vmalloc.c14
-rw-r--r--mm/vmscan.c18
-rw-r--r--mm/vmstat.c6
12 files changed, 428 insertions, 185 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index 011b110365c8..0526445c9b35 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -307,7 +307,7 @@ config NOMMU_INITIAL_TRIM_EXCESS
config TRANSPARENT_HUGEPAGE
bool "Transparent Hugepage Support"
- depends on X86 && MMU
+ depends on X86 && MMU && !PREEMPT_RT_FULL
select COMPACTION
help
Transparent Hugepages allows the kernel to use huge pages and
diff --git a/mm/filemap.c b/mm/filemap.c
index 03c5b0ec2b78..4a30d36ca4c8 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2044,7 +2044,7 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,
char *kaddr;
size_t copied;
- BUG_ON(!in_atomic());
+ BUG_ON(!pagefault_disabled());
kaddr = kmap_atomic(page, KM_USER0);
if (likely(i->nr_segs == 1)) {
int left;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index f538e9b824eb..f9b886db58b5 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -683,37 +683,32 @@ static unsigned long mem_cgroup_nr_lru_pages(struct mem_cgroup *memcg,
return total;
}
-static bool __memcg_event_check(struct mem_cgroup *memcg, int target)
+static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
+ enum mem_cgroup_events_target target)
{
unsigned long val, next;
val = __this_cpu_read(memcg->stat->events[MEM_CGROUP_EVENTS_COUNT]);
next = __this_cpu_read(memcg->stat->targets[target]);
/* from time_after() in jiffies.h */
- return ((long)next - (long)val < 0);
-}
-
-static void __mem_cgroup_target_update(struct mem_cgroup *memcg, int target)
-{
- unsigned long val, next;
-
- val = __this_cpu_read(memcg->stat->events[MEM_CGROUP_EVENTS_COUNT]);
-
- switch (target) {
- case MEM_CGROUP_TARGET_THRESH:
- next = val + THRESHOLDS_EVENTS_TARGET;
- break;
- case MEM_CGROUP_TARGET_SOFTLIMIT:
- next = val + SOFTLIMIT_EVENTS_TARGET;
- break;
- case MEM_CGROUP_TARGET_NUMAINFO:
- next = val + NUMAINFO_EVENTS_TARGET;
- break;
- default:
- return;
+ if ((long)next - (long)val < 0) {
+ switch (target) {
+ case MEM_CGROUP_TARGET_THRESH:
+ next = val + THRESHOLDS_EVENTS_TARGET;
+ break;
+ case MEM_CGROUP_TARGET_SOFTLIMIT:
+ next = val + SOFTLIMIT_EVENTS_TARGET;
+ break;
+ case MEM_CGROUP_TARGET_NUMAINFO:
+ next = val + NUMAINFO_EVENTS_TARGET;
+ break;
+ default:
+ break;
+ }
+ __this_cpu_write(memcg->stat->targets[target], next);
+ return true;
}
-
- __this_cpu_write(memcg->stat->targets[target], next);
+ return false;
}
/*
@@ -724,25 +719,27 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
{
preempt_disable();
/* threshold event is triggered in finer grain than soft limit */
- if (unlikely(__memcg_event_check(memcg, MEM_CGROUP_TARGET_THRESH))) {
+ if (unlikely(mem_cgroup_event_ratelimit(memcg,
+ MEM_CGROUP_TARGET_THRESH))) {
+ bool do_softlimit, do_numainfo;
+
+ do_softlimit = mem_cgroup_event_ratelimit(memcg,
+ MEM_CGROUP_TARGET_SOFTLIMIT);
+#if MAX_NUMNODES > 1
+ do_numainfo = mem_cgroup_event_ratelimit(memcg,
+ MEM_CGROUP_TARGET_NUMAINFO);
+#endif
+ preempt_enable();
+
mem_cgroup_threshold(memcg);
- __mem_cgroup_target_update(memcg, MEM_CGROUP_TARGET_THRESH);
- if (unlikely(__memcg_event_check(memcg,
- MEM_CGROUP_TARGET_SOFTLIMIT))) {
+ if (unlikely(do_softlimit))
mem_cgroup_update_tree(memcg, page);
- __mem_cgroup_target_update(memcg,
- MEM_CGROUP_TARGET_SOFTLIMIT);
- }
#if MAX_NUMNODES > 1
- if (unlikely(__memcg_event_check(memcg,
- MEM_CGROUP_TARGET_NUMAINFO))) {
+ if (unlikely(do_numainfo))
atomic_inc(&memcg->numainfo_events);
- __mem_cgroup_target_update(memcg,
- MEM_CGROUP_TARGET_NUMAINFO);
- }
#endif
- }
- preempt_enable();
+ } else
+ preempt_enable();
}
static struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont)
diff --git a/mm/memory.c b/mm/memory.c
index 829d43735402..61744e9432cf 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3436,6 +3436,32 @@ unlock:
return 0;
}
+#ifdef CONFIG_PREEMPT_RT_FULL
+void pagefault_disable(void)
+{
+ migrate_disable();
+ current->pagefault_disabled++;
+ /*
+ * make sure to have issued the store before a pagefault
+ * can hit.
+ */
+ barrier();
+}
+EXPORT_SYMBOL_GPL(pagefault_disable);
+
+void pagefault_enable(void)
+{
+ /*
+ * make sure to issue those last loads/stores before enabling
+ * the pagefault handler again.
+ */
+ barrier();
+ current->pagefault_disabled--;
+ migrate_enable();
+}
+EXPORT_SYMBOL_GPL(pagefault_enable);
+#endif
+
/*
* By the time we get here, we already hold the mm semaphore
*/
@@ -3984,3 +4010,35 @@ void copy_user_huge_page(struct page *dst, struct page *src,
}
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
+
+#if defined(CONFIG_PREEMPT_RT_FULL) && (USE_SPLIT_PTLOCKS > 0)
+/*
+ * Heinous hack, relies on the caller doing something like:
+ *
+ * pte = alloc_pages(PGALLOC_GFP, 0);
+ * if (pte)
+ * pgtable_page_ctor(pte);
+ * return pte;
+ *
+ * This ensures we release the page and return NULL when the
+ * lock allocation fails.
+ */
+struct page *pte_lock_init(struct page *page)
+{
+ page->ptl = kmalloc(sizeof(spinlock_t), GFP_KERNEL);
+ if (page->ptl) {
+ spin_lock_init(__pte_lockptr(page));
+ } else {
+ __free_page(page);
+ page = NULL;
+ }
+ return page;
+}
+
+void pte_lock_deinit(struct page *page)
+{
+ kfree(page->ptl);
+ page->mapping = NULL;
+}
+
+#endif
diff --git a/mm/mmu_context.c b/mm/mmu_context.c
index cf332bc0080a..64ce279a984d 100644
--- a/mm/mmu_context.c
+++ b/mm/mmu_context.c
@@ -26,6 +26,7 @@ void use_mm(struct mm_struct *mm)
struct task_struct *tsk = current;
task_lock(tsk);
+ local_irq_disable_rt();
active_mm = tsk->active_mm;
if (active_mm != mm) {
atomic_inc(&mm->mm_count);
@@ -33,6 +34,7 @@ void use_mm(struct mm_struct *mm)
}
tsk->mm = mm;
switch_mm(active_mm, mm, tsk);
+ local_irq_enable_rt();
task_unlock(tsk);
if (active_mm != mm)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 485be8976bba..49675c704a5b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -57,6 +57,7 @@
#include <linux/ftrace_event.h>
#include <linux/memcontrol.h>
#include <linux/prefetch.h>
+#include <linux/locallock.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>
@@ -222,6 +223,18 @@ EXPORT_SYMBOL(nr_node_ids);
EXPORT_SYMBOL(nr_online_nodes);
#endif
+static DEFINE_LOCAL_IRQ_LOCK(pa_lock);
+
+#ifdef CONFIG_PREEMPT_RT_BASE
+# define cpu_lock_irqsave(cpu, flags) \
+ spin_lock_irqsave(&per_cpu(pa_lock, cpu).lock, flags)
+# define cpu_unlock_irqrestore(cpu, flags) \
+ spin_unlock_irqrestore(&per_cpu(pa_lock, cpu).lock, flags)
+#else
+# define cpu_lock_irqsave(cpu, flags) local_irq_save(flags)
+# define cpu_unlock_irqrestore(cpu, flags) local_irq_restore(flags)
+#endif
+
int page_group_by_mobility_disabled __read_mostly;
static void set_pageblock_migratetype(struct page *page, int migratetype)
@@ -581,7 +594,7 @@ static inline int free_pages_check(struct page *page)
}
/*
- * Frees a number of pages from the PCP lists
+ * Frees a number of pages which have been collected from the pcp lists.
* Assumes all pages on list are in same zone, and of same order.
* count is the number of pages to free.
*
@@ -592,16 +605,42 @@ static inline int free_pages_check(struct page *page)
* pinned" detection logic.
*/
static void free_pcppages_bulk(struct zone *zone, int count,
- struct per_cpu_pages *pcp)
+ struct list_head *list)
{
- int migratetype = 0;
- int batch_free = 0;
int to_free = count;
+ unsigned long flags;
- spin_lock(&zone->lock);
+ spin_lock_irqsave(&zone->lock, flags);
zone->all_unreclaimable = 0;
zone->pages_scanned = 0;
+ while (!list_empty(list)) {
+ struct page *page = list_first_entry(list, struct page, lru);
+
+ /* must delete as __free_one_page list manipulates */
+ list_del(&page->lru);
+ /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */
+ __free_one_page(page, zone, 0, page_private(page));
+ trace_mm_page_pcpu_drain(page, 0, page_private(page));
+ to_free--;
+ }
+ WARN_ON(to_free != 0);
+ __mod_zone_page_state(zone, NR_FREE_PAGES, count);
+ spin_unlock_irqrestore(&zone->lock, flags);
+}
+
+/*
+ * Moves a number of pages from the PCP lists to free list which
+ * is freed outside of the locked region.
+ *
+ * Assumes all pages on list are in same zone, and of same order.
+ * count is the number of pages to free.
+ */
+static void isolate_pcp_pages(int to_free, struct per_cpu_pages *src,
+ struct list_head *dst)
+{
+ int migratetype = 0, batch_free = 0;
+
while (to_free) {
struct page *page;
struct list_head *list;
@@ -617,7 +656,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
batch_free++;
if (++migratetype == MIGRATE_PCPTYPES)
migratetype = 0;
- list = &pcp->lists[migratetype];
+ list = &src->lists[migratetype];
} while (list_empty(list));
/* This is the only non-empty list. Free them all. */
@@ -625,28 +664,25 @@ static void free_pcppages_bulk(struct zone *zone, int count,
batch_free = to_free;
do {
- page = list_entry(list->prev, struct page, lru);
- /* must delete as __free_one_page list manipulates */
+ page = list_last_entry(list, struct page, lru);
list_del(&page->lru);
- /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */
- __free_one_page(page, zone, 0, page_private(page));
- trace_mm_page_pcpu_drain(page, 0, page_private(page));
+ list_add(&page->lru, dst);
} while (--to_free && --batch_free && !list_empty(list));
}
- __mod_zone_page_state(zone, NR_FREE_PAGES, count);
- spin_unlock(&zone->lock);
}
static void free_one_page(struct zone *zone, struct page *page, int order,
int migratetype)
{
- spin_lock(&zone->lock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&zone->lock, flags);
zone->all_unreclaimable = 0;
zone->pages_scanned = 0;
__free_one_page(page, zone, order, migratetype);
__mod_zone_page_state(zone, NR_FREE_PAGES, 1 << order);
- spin_unlock(&zone->lock);
+ spin_unlock_irqrestore(&zone->lock, flags);
}
static bool free_pages_prepare(struct page *page, unsigned int order)
@@ -683,13 +719,13 @@ static void __free_pages_ok(struct page *page, unsigned int order)
if (!free_pages_prepare(page, order))
return;
- local_irq_save(flags);
+ local_lock_irqsave(pa_lock, flags);
if (unlikely(wasMlocked))
free_page_mlock(page);
__count_vm_events(PGFREE, 1 << order);
free_one_page(page_zone(page), page, order,
get_pageblock_migratetype(page));
- local_irq_restore(flags);
+ local_unlock_irqrestore(pa_lock, flags);
}
/*
@@ -1065,16 +1101,18 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
{
unsigned long flags;
+ LIST_HEAD(dst);
int to_drain;
- local_irq_save(flags);
+ local_lock_irqsave(pa_lock, flags);
if (pcp->count >= pcp->batch)
to_drain = pcp->batch;
else
to_drain = pcp->count;
- free_pcppages_bulk(zone, to_drain, pcp);
+ isolate_pcp_pages(to_drain, pcp, &dst);
pcp->count -= to_drain;
- local_irq_restore(flags);
+ local_unlock_irqrestore(pa_lock, flags);
+ free_pcppages_bulk(zone, to_drain, &dst);
}
#endif
@@ -1093,16 +1131,21 @@ static void drain_pages(unsigned int cpu)
for_each_populated_zone(zone) {
struct per_cpu_pageset *pset;
struct per_cpu_pages *pcp;
+ LIST_HEAD(dst);
+ int count;
- local_irq_save(flags);
+ cpu_lock_irqsave(cpu, flags);
pset = per_cpu_ptr(zone->pageset, cpu);
pcp = &pset->pcp;
- if (pcp->count) {
- free_pcppages_bulk(zone, pcp->count, pcp);
+ count = pcp->count;
+ if (count) {
+ isolate_pcp_pages(count, pcp, &dst);
pcp->count = 0;
}
- local_irq_restore(flags);
+ cpu_unlock_irqrestore(cpu, flags);
+ if (count)
+ free_pcppages_bulk(zone, count, &dst);
}
}
@@ -1119,7 +1162,14 @@ void drain_local_pages(void *arg)
*/
void drain_all_pages(void)
{
+#ifndef CONFIG_PREEMPT_RT_BASE
on_each_cpu(drain_local_pages, NULL, 1);
+#else
+ int i;
+
+ for_each_online_cpu(i)
+ drain_pages(i);
+#endif
}
#ifdef CONFIG_HIBERNATION
@@ -1175,7 +1225,7 @@ void free_hot_cold_page(struct page *page, int cold)
migratetype = get_pageblock_migratetype(page);
set_page_private(page, migratetype);
- local_irq_save(flags);
+ local_lock_irqsave(pa_lock, flags);
if (unlikely(wasMlocked))
free_page_mlock(page);
__count_vm_event(PGFREE);
@@ -1202,12 +1252,19 @@ void free_hot_cold_page(struct page *page, int cold)
list_add(&page->lru, &pcp->lists[migratetype]);
pcp->count++;
if (pcp->count >= pcp->high) {
- free_pcppages_bulk(zone, pcp->batch, pcp);
+ LIST_HEAD(dst);
+ int count;
+
+ isolate_pcp_pages(pcp->batch, pcp, &dst);
pcp->count -= pcp->batch;
+ count = pcp->batch;
+ local_unlock_irqrestore(pa_lock, flags);
+ free_pcppages_bulk(zone, count, &dst);
+ return;
}
out:
- local_irq_restore(flags);
+ local_unlock_irqrestore(pa_lock, flags);
}
/*
@@ -1302,7 +1359,7 @@ again:
struct per_cpu_pages *pcp;
struct list_head *list;
- local_irq_save(flags);
+ local_lock_irqsave(pa_lock, flags);
pcp = &this_cpu_ptr(zone->pageset)->pcp;
list = &pcp->lists[migratetype];
if (list_empty(list)) {
@@ -1334,17 +1391,19 @@ again:
*/
WARN_ON_ONCE(order > 1);
}
- spin_lock_irqsave(&zone->lock, flags);
+ local_spin_lock_irqsave(pa_lock, &zone->lock, flags);
page = __rmqueue(zone, order, migratetype);
- spin_unlock(&zone->lock);
- if (!page)
+ if (!page) {
+ spin_unlock(&zone->lock);
goto failed;
+ }
__mod_zone_page_state(zone, NR_FREE_PAGES, -(1 << order));
+ spin_unlock(&zone->lock);
}
__count_zone_vm_events(PGALLOC, zone, 1 << order);
zone_statistics(preferred_zone, zone, gfp_flags);
- local_irq_restore(flags);
+ local_unlock_irqrestore(pa_lock, flags);
VM_BUG_ON(bad_range(zone, page));
if (prep_new_page(page, order, gfp_flags))
@@ -1352,7 +1411,7 @@ again:
return page;
failed:
- local_irq_restore(flags);
+ local_unlock_irqrestore(pa_lock, flags);
return NULL;
}
@@ -1901,8 +1960,8 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
if (*did_some_progress != COMPACT_SKIPPED) {
/* Page migration frees to the PCP lists but we want merging */
- drain_pages(get_cpu());
- put_cpu();
+ drain_pages(get_cpu_light());
+ put_cpu_light();
page = get_page_from_freelist(gfp_mask, nodemask,
order, zonelist, high_zoneidx,
@@ -3680,14 +3739,16 @@ static int __zone_pcp_update(void *data)
for_each_possible_cpu(cpu) {
struct per_cpu_pageset *pset;
struct per_cpu_pages *pcp;
+ LIST_HEAD(dst);
pset = per_cpu_ptr(zone->pageset, cpu);
pcp = &pset->pcp;
- local_irq_save(flags);
- free_pcppages_bulk(zone, pcp->count, pcp);
+ cpu_lock_irqsave(cpu, flags);
+ isolate_pcp_pages(pcp->count, pcp, &dst);
+ free_pcppages_bulk(zone, pcp->count, &dst);
setup_pageset(pset, batch);
- local_irq_restore(flags);
+ cpu_unlock_irqrestore(cpu, flags);
}
return 0;
}
@@ -5053,6 +5114,7 @@ static int page_alloc_cpu_notify(struct notifier_block *self,
void __init page_alloc_init(void)
{
hotcpu_notifier(page_alloc_cpu_notify, 0);
+ local_irq_lock_init(pa_lock);
}
/*
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
index 2d123f94a8df..2e0d18d8e432 100644
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c
@@ -17,6 +17,7 @@ static void __meminit init_page_cgroup(struct page_cgroup *pc, unsigned long id)
set_page_cgroup_array_id(pc, id);
pc->mem_cgroup = NULL;
INIT_LIST_HEAD(&pc->lru);
+ page_cgroup_lock_init(pc);
}
static unsigned long total_usage;
diff --git a/mm/slab.c b/mm/slab.c
index 83311c9aaf9d..5f0c5ef303d4 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -116,6 +116,7 @@
#include <linux/kmemcheck.h>
#include <linux/memory.h>
#include <linux/prefetch.h>
+#include <linux/locallock.h>
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
@@ -607,6 +608,12 @@ int slab_is_available(void)
return g_cpucache_up >= EARLY;
}
+/*
+ * Guard access to the cache-chain.
+ */
+static DEFINE_MUTEX(cache_chain_mutex);
+static struct list_head cache_chain;
+
#ifdef CONFIG_LOCKDEP
/*
@@ -668,38 +675,41 @@ static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep)
slab_set_debugobj_lock_classes_node(cachep, node);
}
-static void init_node_lock_keys(int q)
+static void init_lock_keys(struct kmem_cache *cachep, int node)
{
- struct cache_sizes *s = malloc_sizes;
+ struct kmem_list3 *l3;
if (g_cpucache_up < LATE)
return;
- for (s = malloc_sizes; s->cs_size != ULONG_MAX; s++) {
- struct kmem_list3 *l3;
+ l3 = cachep->nodelists[node];
+ if (!l3 || OFF_SLAB(cachep))
+ return;
- l3 = s->cs_cachep->nodelists[q];
- if (!l3 || OFF_SLAB(s->cs_cachep))
- continue;
+ slab_set_lock_classes(cachep, &on_slab_l3_key, &on_slab_alc_key, node);
+}
- slab_set_lock_classes(s->cs_cachep, &on_slab_l3_key,
- &on_slab_alc_key, q);
- }
+static void init_node_lock_keys(int node)
+{
+ struct kmem_cache *cachep;
+
+ list_for_each_entry(cachep, &cache_chain, next)
+ init_lock_keys(cachep, node);
}
-static inline void init_lock_keys(void)
+static inline void init_cachep_lock_keys(struct kmem_cache *cachep)
{
int node;
for_each_node(node)
- init_node_lock_keys(node);
+ init_lock_keys(cachep, node);
}
#else
-static void init_node_lock_keys(int q)
+static void init_node_lock_keys(int node)
{
}
-static inline void init_lock_keys(void)
+static void init_cachep_lock_keys(struct kmem_cache *cachep)
{
}
@@ -712,19 +722,67 @@ static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep)
}
#endif
+static DEFINE_PER_CPU(struct delayed_work, slab_reap_work);
+static DEFINE_PER_CPU(struct list_head, slab_free_list);
+static DEFINE_LOCAL_IRQ_LOCK(slab_lock);
+
+#ifndef CONFIG_PREEMPT_RT_BASE
+# define slab_on_each_cpu(func, cp) on_each_cpu(func, cp, 1)
+#else
/*
- * Guard access to the cache-chain.
+ * execute func() for all CPUs. On PREEMPT_RT we dont actually have
+ * to run on the remote CPUs - we only have to take their CPU-locks.
+ * (This is a rare operation, so cacheline bouncing is not an issue.)
*/
-static DEFINE_MUTEX(cache_chain_mutex);
-static struct list_head cache_chain;
+static void
+slab_on_each_cpu(void (*func)(void *arg, int this_cpu), void *arg)
+{
+ unsigned int i;
-static DEFINE_PER_CPU(struct delayed_work, slab_reap_work);
+ for_each_online_cpu(i)
+ func(arg, i);
+}
+#endif
+
+static void free_delayed(struct list_head *h)
+{
+ while(!list_empty(h)) {
+ struct page *page = list_first_entry(h, struct page, lru);
+
+ list_del(&page->lru);
+ __free_pages(page, page->index);
+ }
+}
+
+static void unlock_l3_and_free_delayed(spinlock_t *list_lock)
+{
+ LIST_HEAD(tmp);
+
+ list_splice_init(&__get_cpu_var(slab_free_list), &tmp);
+ local_spin_unlock_irq(slab_lock, list_lock);
+ free_delayed(&tmp);
+}
+
+static void unlock_slab_and_free_delayed(unsigned long flags)
+{
+ LIST_HEAD(tmp);
+
+ list_splice_init(&__get_cpu_var(slab_free_list), &tmp);
+ local_unlock_irqrestore(slab_lock, flags);
+ free_delayed(&tmp);
+}
static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
{
return cachep->array[smp_processor_id()];
}
+static inline struct array_cache *cpu_cache_get_on_cpu(struct kmem_cache *cachep,
+ int cpu)
+{
+ return cachep->array[cpu];
+}
+
static inline struct kmem_cache *__find_general_cachep(size_t size,
gfp_t gfpflags)
{
@@ -1062,9 +1120,10 @@ static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3)
if (l3->alien) {
struct array_cache *ac = l3->alien[node];
- if (ac && ac->avail && spin_trylock_irq(&ac->lock)) {
+ if (ac && ac->avail &&
+ local_spin_trylock_irq(slab_lock, &ac->lock)) {
__drain_alien_cache(cachep, ac, node);
- spin_unlock_irq(&ac->lock);
+ local_spin_unlock_irq(slab_lock, &ac->lock);
}
}
}
@@ -1079,9 +1138,9 @@ static void drain_alien_cache(struct kmem_cache *cachep,
for_each_online_node(i) {
ac = alien[i];
if (ac) {
- spin_lock_irqsave(&ac->lock, flags);
+ local_spin_lock_irqsave(slab_lock, &ac->lock, flags);
__drain_alien_cache(cachep, ac, i);
- spin_unlock_irqrestore(&ac->lock, flags);
+ local_spin_unlock_irqrestore(slab_lock, &ac->lock, flags);
}
}
}
@@ -1160,11 +1219,11 @@ static int init_cache_nodelists_node(int node)
cachep->nodelists[node] = l3;
}
- spin_lock_irq(&cachep->nodelists[node]->list_lock);
+ local_spin_lock_irq(slab_lock, &cachep->nodelists[node]->list_lock);
cachep->nodelists[node]->free_limit =
(1 + nr_cpus_node(node)) *
cachep->batchcount + cachep->num;
- spin_unlock_irq(&cachep->nodelists[node]->list_lock);
+ local_spin_unlock_irq(slab_lock, &cachep->nodelists[node]->list_lock);
}
return 0;
}
@@ -1189,7 +1248,7 @@ static void __cpuinit cpuup_canceled(long cpu)
if (!l3)
goto free_array_cache;
- spin_lock_irq(&l3->list_lock);
+ local_spin_lock_irq(slab_lock, &l3->list_lock);
/* Free limit for this kmem_list3 */
l3->free_limit -= cachep->batchcount;
@@ -1197,7 +1256,7 @@ static void __cpuinit cpuup_canceled(long cpu)
free_block(cachep, nc->entry, nc->avail, node);
if (!cpumask_empty(mask)) {
- spin_unlock_irq(&l3->list_lock);
+ unlock_l3_and_free_delayed(&l3->list_lock);
goto free_array_cache;
}
@@ -1211,7 +1270,7 @@ static void __cpuinit cpuup_canceled(long cpu)
alien = l3->alien;
l3->alien = NULL;
- spin_unlock_irq(&l3->list_lock);
+ unlock_l3_and_free_delayed(&l3->list_lock);
kfree(shared);
if (alien) {
@@ -1285,7 +1344,7 @@ static int __cpuinit cpuup_prepare(long cpu)
l3 = cachep->nodelists[node];
BUG_ON(!l3);
- spin_lock_irq(&l3->list_lock);
+ local_spin_lock_irq(slab_lock, &l3->list_lock);
if (!l3->shared) {
/*
* We are serialised from CPU_DEAD or
@@ -1300,7 +1359,7 @@ static int __cpuinit cpuup_prepare(long cpu)
alien = NULL;
}
#endif
- spin_unlock_irq(&l3->list_lock);
+ local_spin_unlock_irq(slab_lock, &l3->list_lock);
kfree(shared);
free_alien_cache(alien);
if (cachep->flags & SLAB_DEBUG_OBJECTS)
@@ -1491,6 +1550,10 @@ void __init kmem_cache_init(void)
if (num_possible_nodes() == 1)
use_alien_caches = 0;
+ local_irq_lock_init(slab_lock);
+ for_each_possible_cpu(i)
+ INIT_LIST_HEAD(&per_cpu(slab_free_list, i));
+
for (i = 0; i < NUM_INIT_LISTS; i++) {
kmem_list3_init(&initkmem_list3[i]);
if (i < MAX_NUMNODES)
@@ -1669,14 +1732,13 @@ void __init kmem_cache_init_late(void)
g_cpucache_up = LATE;
- /* Annotate slab for lockdep -- annotate the malloc caches */
- init_lock_keys();
-
/* 6) resize the head arrays to their final sizes */
mutex_lock(&cache_chain_mutex);
- list_for_each_entry(cachep, &cache_chain, next)
+ list_for_each_entry(cachep, &cache_chain, next) {
+ init_cachep_lock_keys(cachep);
if (enable_cpucache(cachep, GFP_NOWAIT))
BUG();
+ }
mutex_unlock(&cache_chain_mutex);
/* Done! */
@@ -1769,12 +1831,14 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
/*
* Interface to system's page release.
*/
-static void kmem_freepages(struct kmem_cache *cachep, void *addr)
+static void kmem_freepages(struct kmem_cache *cachep, void *addr, bool delayed)
{
unsigned long i = (1 << cachep->gfporder);
- struct page *page = virt_to_page(addr);
+ struct page *page, *basepage = virt_to_page(addr);
const unsigned long nr_freed = i;
+ page = basepage;
+
kmemcheck_free_shadow(page, cachep->gfporder);
if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
@@ -1790,7 +1854,13 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr)
}
if (current->reclaim_state)
current->reclaim_state->reclaimed_slab += nr_freed;
- free_pages((unsigned long)addr, cachep->gfporder);
+
+ if (!delayed) {
+ free_pages((unsigned long)addr, cachep->gfporder);
+ } else {
+ basepage->index = cachep->gfporder;
+ list_add(&basepage->lru, &__get_cpu_var(slab_free_list));
+ }
}
static void kmem_rcu_free(struct rcu_head *head)
@@ -1798,7 +1868,7 @@ static void kmem_rcu_free(struct rcu_head *head)
struct slab_rcu *slab_rcu = (struct slab_rcu *)head;
struct kmem_cache *cachep = slab_rcu->cachep;
- kmem_freepages(cachep, slab_rcu->addr);
+ kmem_freepages(cachep, slab_rcu->addr, false);
if (OFF_SLAB(cachep))
kmem_cache_free(cachep->slabp_cache, slab_rcu);
}
@@ -2017,7 +2087,8 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slab
* Before calling the slab must have been unlinked from the cache. The
* cache-lock is not held/needed.
*/
-static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp)
+static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp,
+ bool delayed)
{
void *addr = slabp->s_mem - slabp->colouroff;
@@ -2030,7 +2101,7 @@ static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp)
slab_rcu->addr = addr;
call_rcu(&slab_rcu->head, kmem_rcu_free);
} else {
- kmem_freepages(cachep, addr);
+ kmem_freepages(cachep, addr, delayed);
if (OFF_SLAB(cachep))
kmem_cache_free(cachep->slabp_cache, slabp);
}
@@ -2479,6 +2550,8 @@ kmem_cache_create (const char *name, size_t size, size_t align,
slab_set_debugobj_lock_classes(cachep);
}
+ init_cachep_lock_keys(cachep);
+
/* cache setup completed, link it into the list */
list_add(&cachep->next, &cache_chain);
oops:
@@ -2496,7 +2569,7 @@ EXPORT_SYMBOL(kmem_cache_create);
#if DEBUG
static void check_irq_off(void)
{
- BUG_ON(!irqs_disabled());
+ BUG_ON_NONRT(!irqs_disabled());
}
static void check_irq_on(void)
@@ -2531,26 +2604,43 @@ static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
struct array_cache *ac,
int force, int node);
-static void do_drain(void *arg)
+static void __do_drain(void *arg, unsigned int cpu)
{
struct kmem_cache *cachep = arg;
struct array_cache *ac;
- int node = numa_mem_id();
+ int node = cpu_to_mem(cpu);
- check_irq_off();
- ac = cpu_cache_get(cachep);
+ ac = cpu_cache_get_on_cpu(cachep, cpu);
spin_lock(&cachep->nodelists[node]->list_lock);
free_block(cachep, ac->entry, ac->avail, node);
spin_unlock(&cachep->nodelists[node]->list_lock);
ac->avail = 0;
}
+#ifndef CONFIG_PREEMPT_RT_BASE
+static void do_drain(void *arg)
+{
+ __do_drain(arg, smp_processor_id());
+}
+#else
+static void do_drain(void *arg, int cpu)
+{
+ LIST_HEAD(tmp);
+
+ spin_lock_irq(&per_cpu(slab_lock, cpu).lock);
+ __do_drain(arg, cpu);
+ list_splice_init(&per_cpu(slab_free_list, cpu), &tmp);
+ spin_unlock_irq(&per_cpu(slab_lock, cpu).lock);
+ free_delayed(&tmp);
+}
+#endif
+
static void drain_cpu_caches(struct kmem_cache *cachep)
{
struct kmem_list3 *l3;
int node;
- on_each_cpu(do_drain, cachep, 1);
+ slab_on_each_cpu(do_drain, cachep);
check_irq_on();
for_each_online_node(node) {
l3 = cachep->nodelists[node];
@@ -2581,10 +2671,10 @@ static int drain_freelist(struct kmem_cache *cache,
nr_freed = 0;
while (nr_freed < tofree && !list_empty(&l3->slabs_free)) {
- spin_lock_irq(&l3->list_lock);
+ local_spin_lock_irq(slab_lock, &l3->list_lock);
p = l3->slabs_free.prev;
if (p == &l3->slabs_free) {
- spin_unlock_irq(&l3->list_lock);
+ local_spin_unlock_irq(slab_lock, &l3->list_lock);
goto out;
}
@@ -2598,8 +2688,8 @@ static int drain_freelist(struct kmem_cache *cache,
* to the cache.
*/
l3->free_objects -= cache->num;
- spin_unlock_irq(&l3->list_lock);
- slab_destroy(cache, slabp);
+ local_spin_unlock_irq(slab_lock, &l3->list_lock);
+ slab_destroy(cache, slabp, false);
nr_freed++;
}
out:
@@ -2893,7 +2983,7 @@ static int cache_grow(struct kmem_cache *cachep,
offset *= cachep->colour_off;
if (local_flags & __GFP_WAIT)
- local_irq_enable();
+ local_unlock_irq(slab_lock);
/*
* The test for missing atomic flag is performed here, rather than
@@ -2923,7 +3013,7 @@ static int cache_grow(struct kmem_cache *cachep,
cache_init_objs(cachep, slabp);
if (local_flags & __GFP_WAIT)
- local_irq_disable();
+ local_lock_irq(slab_lock);
check_irq_off();
spin_lock(&l3->list_lock);
@@ -2934,10 +3024,10 @@ static int cache_grow(struct kmem_cache *cachep,
spin_unlock(&l3->list_lock);
return 1;
opps1:
- kmem_freepages(cachep, objp);
+ kmem_freepages(cachep, objp, false);
failed:
if (local_flags & __GFP_WAIT)
- local_irq_disable();
+ local_lock_irq(slab_lock);
return 0;
}
@@ -3329,11 +3419,11 @@ retry:
* set and go into memory reserves if necessary.
*/
if (local_flags & __GFP_WAIT)
- local_irq_enable();
+ local_unlock_irq(slab_lock);
kmem_flagcheck(cache, flags);
obj = kmem_getpages(cache, local_flags, numa_mem_id());
if (local_flags & __GFP_WAIT)
- local_irq_disable();
+ local_lock_irq(slab_lock);
if (obj) {
/*
* Insert into the appropriate per node queues
@@ -3449,7 +3539,7 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
return NULL;
cache_alloc_debugcheck_before(cachep, flags);
- local_irq_save(save_flags);
+ local_lock_irqsave(slab_lock, save_flags);
if (nodeid == NUMA_NO_NODE)
nodeid = slab_node;
@@ -3474,7 +3564,7 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
/* ___cache_alloc_node can fall back to other nodes */
ptr = ____cache_alloc_node(cachep, flags, nodeid);
out:
- local_irq_restore(save_flags);
+ local_unlock_irqrestore(slab_lock, save_flags);
ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
kmemleak_alloc_recursive(ptr, obj_size(cachep), 1, cachep->flags,
flags);
@@ -3534,9 +3624,9 @@ __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)
return NULL;
cache_alloc_debugcheck_before(cachep, flags);
- local_irq_save(save_flags);
+ local_lock_irqsave(slab_lock, save_flags);
objp = __do_cache_alloc(cachep, flags);
- local_irq_restore(save_flags);
+ local_unlock_irqrestore(slab_lock, save_flags);
objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller);
kmemleak_alloc_recursive(objp, obj_size(cachep), 1, cachep->flags,
flags);
@@ -3584,7 +3674,7 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
* a different cache, refer to comments before
* alloc_slabmgmt.
*/
- slab_destroy(cachep, slabp);
+ slab_destroy(cachep, slabp, true);
} else {
list_add(&slabp->list, &l3->slabs_free);
}
@@ -3847,12 +3937,12 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp)
{
unsigned long flags;
- local_irq_save(flags);
debug_check_no_locks_freed(objp, obj_size(cachep));
if (!(cachep->flags & SLAB_DEBUG_OBJECTS))
debug_check_no_obj_freed(objp, obj_size(cachep));
+ local_lock_irqsave(slab_lock, flags);
__cache_free(cachep, objp, __builtin_return_address(0));
- local_irq_restore(flags);
+ unlock_slab_and_free_delayed(flags);
trace_kmem_cache_free(_RET_IP_, objp);
}
@@ -3876,13 +3966,13 @@ void kfree(const void *objp)
if (unlikely(ZERO_OR_NULL_PTR(objp)))
return;
- local_irq_save(flags);
kfree_debugcheck(objp);
c = virt_to_cache(objp);
debug_check_no_locks_freed(objp, obj_size(c));
debug_check_no_obj_freed(objp, obj_size(c));
+ local_lock_irqsave(slab_lock, flags);
__cache_free(c, (void *)objp, __builtin_return_address(0));
- local_irq_restore(flags);
+ unlock_slab_and_free_delayed(flags);
}
EXPORT_SYMBOL(kfree);
@@ -3925,7 +4015,7 @@ static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp)
if (l3) {
struct array_cache *shared = l3->shared;
- spin_lock_irq(&l3->list_lock);
+ local_spin_lock_irq(slab_lock, &l3->list_lock);
if (shared)
free_block(cachep, shared->entry,
@@ -3938,7 +4028,8 @@ static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp)
}
l3->free_limit = (1 + nr_cpus_node(node)) *
cachep->batchcount + cachep->num;
- spin_unlock_irq(&l3->list_lock);
+ unlock_l3_and_free_delayed(&l3->list_lock);
+
kfree(shared);
free_alien_cache(new_alien);
continue;
@@ -3985,17 +4076,30 @@ struct ccupdate_struct {
struct array_cache *new[0];
};
-static void do_ccupdate_local(void *info)
+static void __do_ccupdate_local(void *info, int cpu)
{
struct ccupdate_struct *new = info;
struct array_cache *old;
- check_irq_off();
- old = cpu_cache_get(new->cachep);
+ old = cpu_cache_get_on_cpu(new->cachep, cpu);
+
+ new->cachep->array[cpu] = new->new[cpu];
+ new->new[cpu] = old;
+}
- new->cachep->array[smp_processor_id()] = new->new[smp_processor_id()];
- new->new[smp_processor_id()] = old;
+#ifndef CONFIG_PREEMPT_RT_BASE
+static void do_ccupdate_local(void *info)
+{
+ __do_ccupdate_local(info, smp_processor_id());
}
+#else
+static void do_ccupdate_local(void *info, int cpu)
+{
+ spin_lock_irq(&per_cpu(slab_lock, cpu).lock);
+ __do_ccupdate_local(info, cpu);
+ spin_unlock_irq(&per_cpu(slab_lock, cpu).lock);
+}
+#endif
/* Always called with the cache_chain_mutex held */
static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
@@ -4021,7 +4125,7 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
}
new->cachep = cachep;
- on_each_cpu(do_ccupdate_local, (void *)new, 1);
+ slab_on_each_cpu(do_ccupdate_local, (void *)new);
check_irq_on();
cachep->batchcount = batchcount;
@@ -4032,9 +4136,11 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
struct array_cache *ccold = new->new[i];
if (!ccold)
continue;
- spin_lock_irq(&cachep->nodelists[cpu_to_mem(i)]->list_lock);
+ local_spin_lock_irq(slab_lock,
+ &cachep->nodelists[cpu_to_mem(i)]->list_lock);
free_block(cachep, ccold->entry, ccold->avail, cpu_to_mem(i));
- spin_unlock_irq(&cachep->nodelists[cpu_to_mem(i)]->list_lock);
+
+ unlock_l3_and_free_delayed(&cachep->nodelists[cpu_to_mem(i)]->list_lock);
kfree(ccold);
}
kfree(new);
@@ -4110,7 +4216,7 @@ static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
if (ac->touched && !force) {
ac->touched = 0;
} else {
- spin_lock_irq(&l3->list_lock);
+ local_spin_lock_irq(slab_lock, &l3->list_lock);
if (ac->avail) {
tofree = force ? ac->avail : (ac->limit + 4) / 5;
if (tofree > ac->avail)
@@ -4120,7 +4226,7 @@ static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
memmove(ac->entry, &(ac->entry[tofree]),
sizeof(void *) * ac->avail);
}
- spin_unlock_irq(&l3->list_lock);
+ local_spin_unlock_irq(slab_lock, &l3->list_lock);
}
}
@@ -4259,7 +4365,7 @@ static int s_show(struct seq_file *m, void *p)
continue;
check_irq_on();
- spin_lock_irq(&l3->list_lock);
+ local_spin_lock_irq(slab_lock, &l3->list_lock);
list_for_each_entry(slabp, &l3->slabs_full, list) {
if (slabp->inuse != cachep->num && !error)
@@ -4284,7 +4390,7 @@ static int s_show(struct seq_file *m, void *p)
if (l3->shared)
shared_avail += l3->shared->avail;
- spin_unlock_irq(&l3->list_lock);
+ local_spin_unlock_irq(slab_lock, &l3->list_lock);
}
num_slabs += active_slabs;
num_objs = num_slabs * cachep->num;
@@ -4513,13 +4619,13 @@ static int leaks_show(struct seq_file *m, void *p)
continue;
check_irq_on();
- spin_lock_irq(&l3->list_lock);
+ local_spin_lock_irq(slab_lock, &l3->list_lock);
list_for_each_entry(slabp, &l3->slabs_full, list)
handle_slab(n, cachep, slabp);
list_for_each_entry(slabp, &l3->slabs_partial, list)
handle_slab(n, cachep, slabp);
- spin_unlock_irq(&l3->list_lock);
+ local_spin_unlock_irq(slab_lock, &l3->list_lock);
}
name = cachep->name;
if (n[0] == n[1]) {
diff --git a/mm/swap.c b/mm/swap.c
index 55b266dcfbeb..e3f7d6fa3192 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -31,6 +31,7 @@
#include <linux/backing-dev.h>
#include <linux/memcontrol.h>
#include <linux/gfp.h>
+#include <linux/locallock.h>
#include "internal.h"
@@ -41,6 +42,9 @@ static DEFINE_PER_CPU(struct pagevec[NR_LRU_LISTS], lru_add_pvecs);
static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);
static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs);
+static DEFINE_LOCAL_IRQ_LOCK(rotate_lock);
+static DEFINE_LOCAL_IRQ_LOCK(swap_lock);
+
/*
* This path almost never happens for VM activity - pages are normally
* freed via pagevecs. But it gets used by networking.
@@ -267,11 +271,11 @@ void rotate_reclaimable_page(struct page *page)
unsigned long flags;
page_cache_get(page);
- local_irq_save(flags);
+ local_lock_irqsave(rotate_lock, flags);
pvec = &__get_cpu_var(lru_rotate_pvecs);
if (!pagevec_add(pvec, page))
pagevec_move_tail(pvec);
- local_irq_restore(flags);
+ local_unlock_irqrestore(rotate_lock, flags);
}
}
@@ -327,12 +331,13 @@ static void activate_page_drain(int cpu)
void activate_page(struct page *page)
{
if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
- struct pagevec *pvec = &get_cpu_var(activate_page_pvecs);
+ struct pagevec *pvec = &get_locked_var(swap_lock,
+ activate_page_pvecs);
page_cache_get(page);
if (!pagevec_add(pvec, page))
pagevec_lru_move_fn(pvec, __activate_page, NULL);
- put_cpu_var(activate_page_pvecs);
+ put_locked_var(swap_lock, activate_page_pvecs);
}
}
@@ -373,12 +378,12 @@ EXPORT_SYMBOL(mark_page_accessed);
void __lru_cache_add(struct page *page, enum lru_list lru)
{
- struct pagevec *pvec = &get_cpu_var(lru_add_pvecs)[lru];
+ struct pagevec *pvec = &get_locked_var(swap_lock, lru_add_pvecs)[lru];
page_cache_get(page);
if (!pagevec_add(pvec, page))
____pagevec_lru_add(pvec, lru);
- put_cpu_var(lru_add_pvecs);
+ put_locked_var(swap_lock, lru_add_pvecs);
}
EXPORT_SYMBOL(__lru_cache_add);
@@ -512,9 +517,9 @@ static void drain_cpu_pagevecs(int cpu)
unsigned long flags;
/* No harm done if a racing interrupt already did this */
- local_irq_save(flags);
+ local_lock_irqsave(rotate_lock, flags);
pagevec_move_tail(pvec);
- local_irq_restore(flags);
+ local_unlock_irqrestore(rotate_lock, flags);
}
pvec = &per_cpu(lru_deactivate_pvecs, cpu);
@@ -542,18 +547,19 @@ void deactivate_page(struct page *page)
return;
if (likely(get_page_unless_zero(page))) {
- struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);
+ struct pagevec *pvec = &get_locked_var(swap_lock,
+ lru_deactivate_pvecs);
if (!pagevec_add(pvec, page))
pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
- put_cpu_var(lru_deactivate_pvecs);
+ put_locked_var(swap_lock, lru_deactivate_pvecs);
}
}
void lru_add_drain(void)
{
- drain_cpu_pagevecs(get_cpu());
- put_cpu();
+ drain_cpu_pagevecs(local_lock_cpu(swap_lock));
+ local_unlock_cpu(swap_lock);
}
static void lru_add_drain_per_cpu(struct work_struct *dummy)
@@ -783,6 +789,9 @@ void __init swap_setup(void)
{
unsigned long megs = totalram_pages >> (20 - PAGE_SHIFT);
+ local_irq_lock_init(rotate_lock);
+ local_irq_lock_init(swap_lock);
+
#ifdef CONFIG_SWAP
bdi_init(swapper_space.backing_dev_info);
#endif
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 27be2f0d4cb7..071155a6f83b 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -782,7 +782,7 @@ static struct vmap_block *new_vmap_block(gfp_t gfp_mask)
struct vmap_block *vb;
struct vmap_area *va;
unsigned long vb_idx;
- int node, err;
+ int node, err, cpu;
node = numa_node_id();
@@ -821,12 +821,13 @@ static struct vmap_block *new_vmap_block(gfp_t gfp_mask)
BUG_ON(err);
radix_tree_preload_end();
- vbq = &get_cpu_var(vmap_block_queue);
+ cpu = get_cpu_light();
+ vbq = &__get_cpu_var(vmap_block_queue);
vb->vbq = vbq;
spin_lock(&vbq->lock);
list_add_rcu(&vb->free_list, &vbq->free);
spin_unlock(&vbq->lock);
- put_cpu_var(vmap_block_queue);
+ put_cpu_light();
return vb;
}
@@ -900,7 +901,7 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
struct vmap_block *vb;
unsigned long addr = 0;
unsigned int order;
- int purge = 0;
+ int purge = 0, cpu;
BUG_ON(size & ~PAGE_MASK);
BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
@@ -908,7 +909,8 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
again:
rcu_read_lock();
- vbq = &get_cpu_var(vmap_block_queue);
+ cpu = get_cpu_light();
+ vbq = &__get_cpu_var(vmap_block_queue);
list_for_each_entry_rcu(vb, &vbq->free, free_list) {
int i;
@@ -945,7 +947,7 @@ next:
if (purge)
purge_fragmented_blocks_thiscpu();
- put_cpu_var(vmap_block_queue);
+ put_cpu_light();
rcu_read_unlock();
if (!addr) {
diff --git a/mm/vmscan.c b/mm/vmscan.c
index cb33d9cd4d65..39f9629346a1 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1344,8 +1344,8 @@ static int too_many_isolated(struct zone *zone, int file,
*/
static noinline_for_stack void
putback_lru_pages(struct zone *zone, struct scan_control *sc,
- unsigned long nr_anon, unsigned long nr_file,
- struct list_head *page_list)
+ unsigned long nr_anon, unsigned long nr_file,
+ struct list_head *page_list, unsigned long nr_reclaimed)
{
struct page *page;
struct pagevec pvec;
@@ -1356,7 +1356,12 @@ putback_lru_pages(struct zone *zone, struct scan_control *sc,
/*
* Put back any unfreeable pages.
*/
- spin_lock(&zone->lru_lock);
+ spin_lock_irq(&zone->lru_lock);
+
+ if (current_is_kswapd())
+ __count_vm_events(KSWAPD_STEAL, nr_reclaimed);
+ __count_zone_vm_events(PGSTEAL, zone, nr_reclaimed);
+
while (!list_empty(page_list)) {
int lru;
page = lru_to_page(page_list);
@@ -1539,12 +1544,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
priority, &nr_dirty, &nr_writeback);
}
- local_irq_disable();
- if (current_is_kswapd())
- __count_vm_events(KSWAPD_STEAL, nr_reclaimed);
- __count_zone_vm_events(PGSTEAL, zone, nr_reclaimed);
-
- putback_lru_pages(zone, sc, nr_anon, nr_file, &page_list);
+ putback_lru_pages(zone, sc, nr_anon, nr_file, &page_list, nr_reclaimed);
/*
* If reclaim is isolating dirty pages under writeback, it implies
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 8fd603b1665e..726f0b6b0508 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -216,6 +216,7 @@ void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
long x;
long t;
+ preempt_disable_rt();
x = delta + __this_cpu_read(*p);
t = __this_cpu_read(pcp->stat_threshold);
@@ -225,6 +226,7 @@ void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
x = 0;
}
__this_cpu_write(*p, x);
+ preempt_enable_rt();
}
EXPORT_SYMBOL(__mod_zone_page_state);
@@ -257,6 +259,7 @@ void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
s8 __percpu *p = pcp->vm_stat_diff + item;
s8 v, t;
+ preempt_disable_rt();
v = __this_cpu_inc_return(*p);
t = __this_cpu_read(pcp->stat_threshold);
if (unlikely(v > t)) {
@@ -265,6 +268,7 @@ void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
zone_page_state_add(v + overstep, zone, item);
__this_cpu_write(*p, -overstep);
}
+ preempt_enable_rt();
}
void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
@@ -279,6 +283,7 @@ void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
s8 __percpu *p = pcp->vm_stat_diff + item;
s8 v, t;
+ preempt_disable_rt();
v = __this_cpu_dec_return(*p);
t = __this_cpu_read(pcp->stat_threshold);
if (unlikely(v < - t)) {
@@ -287,6 +292,7 @@ void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
zone_page_state_add(v - overstep, zone, item);
__this_cpu_write(*p, overstep);
}
+ preempt_enable_rt();
}
void __dec_zone_page_state(struct page *page, enum zone_stat_item item)