summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
authorKOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>2009-04-28 22:48:11 +0200
committerGreg Kroah-Hartman <gregkh@suse.de>2009-05-08 15:45:11 -0700
commit28441fac78d703b12b649eaf285576646a9f8b8f (patch)
tree9123d09cc4e821d4d53a30c8cce53eb9f14896e1 /mm
parentea20dec21f7b8ddd40b9ab307f3c7e9a2b00fb84 (diff)
mm: fix Committed_AS underflow on large NR_CPUS environment
commit 00a62ce91e554198ef28234c91c36f850f5a3bc9 upstream The Committed_AS field can underflow in certain situations: > # while true; do cat /proc/meminfo | grep _AS; sleep 1; done | uniq -c > 1 Committed_AS: 18446744073709323392 kB > 11 Committed_AS: 18446744073709455488 kB > 6 Committed_AS: 35136 kB > 5 Committed_AS: 18446744073709454400 kB > 7 Committed_AS: 35904 kB > 3 Committed_AS: 18446744073709453248 kB > 2 Committed_AS: 34752 kB > 9 Committed_AS: 18446744073709453248 kB > 8 Committed_AS: 34752 kB > 3 Committed_AS: 18446744073709320960 kB > 7 Committed_AS: 18446744073709454080 kB > 3 Committed_AS: 18446744073709320960 kB > 5 Committed_AS: 18446744073709454080 kB > 6 Committed_AS: 18446744073709320960 kB Because NR_CPUS can be greater than 1000 and meminfo_proc_show() does not check for underflow. But NR_CPUS proportional isn't good calculation. In general, possibility of lock contention is proportional to the number of online cpus, not theorical maximum cpus (NR_CPUS). The current kernel has generic percpu-counter stuff. using it is right way. it makes code simplify and percpu_counter_read_positive() don't make underflow issue. Reported-by: Dave Hansen <dave@linux.vnet.ibm.com> Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Eric B Munson <ebmunson@us.ibm.com> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Christoph Lameter <cl@linux-foundation.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
Diffstat (limited to 'mm')
-rw-r--r--mm/mmap.c12
-rw-r--r--mm/nommu.c13
-rw-r--r--mm/swap.c46
3 files changed, 13 insertions, 58 deletions
diff --git a/mm/mmap.c b/mm/mmap.c
index f1aa6f92bc8a..efff81b133e6 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -84,7 +84,7 @@ EXPORT_SYMBOL(vm_get_page_prot);
int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */
int sysctl_overcommit_ratio = 50; /* default is 50% */
int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
-atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0);
+struct percpu_counter vm_committed_as;
/*
* Check that a process has enough memory to allocate a new virtual
@@ -178,11 +178,7 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
if (mm)
allowed -= mm->total_vm / 32;
- /*
- * cast `allowed' as a signed long because vm_committed_space
- * sometimes has a negative value
- */
- if (atomic_long_read(&vm_committed_space) < (long)allowed)
+ if (percpu_counter_read_positive(&vm_committed_as) < allowed)
return 0;
error:
vm_unacct_memory(pages);
@@ -2477,6 +2473,10 @@ void mm_drop_all_locks(struct mm_struct *mm)
*/
void __init mmap_init(void)
{
+ int ret;
+
+ ret = percpu_counter_init(&vm_committed_as, 0);
+ VM_BUG_ON(ret);
vm_area_cachep = kmem_cache_create("vm_area_struct",
sizeof(struct vm_area_struct), 0,
SLAB_PANIC, NULL);
diff --git a/mm/nommu.c b/mm/nommu.c
index 2fcf47d449b4..ee955bc27845 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -62,7 +62,7 @@ void *high_memory;
struct page *mem_map;
unsigned long max_mapnr;
unsigned long num_physpages;
-atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0);
+struct percpu_counter vm_committed_as;
int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */
int sysctl_overcommit_ratio = 50; /* default is 50% */
int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT;
@@ -463,6 +463,10 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
*/
void __init mmap_init(void)
{
+ int ret;
+
+ ret = percpu_counter_init(&vm_committed_as, 0);
+ VM_BUG_ON(ret);
vm_region_jar = kmem_cache_create("vm_region_jar",
sizeof(struct vm_region), 0,
SLAB_PANIC, NULL);
@@ -1849,12 +1853,9 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
if (mm)
allowed -= mm->total_vm / 32;
- /*
- * cast `allowed' as a signed long because vm_committed_space
- * sometimes has a negative value
- */
- if (atomic_long_read(&vm_committed_space) < (long)allowed)
+ if (percpu_counter_read_positive(&vm_committed_as) < allowed)
return 0;
+
error:
vm_unacct_memory(pages);
diff --git a/mm/swap.c b/mm/swap.c
index 8adb9feb61e1..2460f7de91f7 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -514,49 +514,6 @@ unsigned pagevec_lookup_tag(struct pagevec *pvec, struct address_space *mapping,
EXPORT_SYMBOL(pagevec_lookup_tag);
-#ifdef CONFIG_SMP
-/*
- * We tolerate a little inaccuracy to avoid ping-ponging the counter between
- * CPUs
- */
-#define ACCT_THRESHOLD max(16, NR_CPUS * 2)
-
-static DEFINE_PER_CPU(long, committed_space);
-
-void vm_acct_memory(long pages)
-{
- long *local;
-
- preempt_disable();
- local = &__get_cpu_var(committed_space);
- *local += pages;
- if (*local > ACCT_THRESHOLD || *local < -ACCT_THRESHOLD) {
- atomic_long_add(*local, &vm_committed_space);
- *local = 0;
- }
- preempt_enable();
-}
-
-#ifdef CONFIG_HOTPLUG_CPU
-
-/* Drop the CPU's cached committed space back into the central pool. */
-static int cpu_swap_callback(struct notifier_block *nfb,
- unsigned long action,
- void *hcpu)
-{
- long *committed;
-
- committed = &per_cpu(committed_space, (long)hcpu);
- if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
- atomic_long_add(*committed, &vm_committed_space);
- *committed = 0;
- drain_cpu_pagevecs((long)hcpu);
- }
- return NOTIFY_OK;
-}
-#endif /* CONFIG_HOTPLUG_CPU */
-#endif /* CONFIG_SMP */
-
/*
* Perform any setup for the swap system
*/
@@ -577,7 +534,4 @@ void __init swap_setup(void)
* Right now other parts of the system means that we
* _really_ don't want to cluster much more
*/
-#ifdef CONFIG_HOTPLUG_CPU
- hotcpu_notifier(cpu_swap_callback, 0);
-#endif
}