From 5a505085f043e8380f83610f79642853c051e2f1 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 2 Dec 2012 19:56:46 +0000 Subject: mm/rmap: Convert the struct anon_vma::mutex to an rwsem Convert the struct anon_vma::mutex to an rwsem, which will help in solving a page-migration scalability problem. (Addressed in a separate patch.) The conversion is simple and straightforward: in every case where we mutex_lock()ed we'll now down_write(). Suggested-by: Linus Torvalds Reviewed-by: Rik van Riel Cc: Peter Zijlstra Cc: Paul Turner Cc: Lee Schermerhorn Cc: Christoph Lameter Cc: Mel Gorman Cc: Andrea Arcangeli Cc: Johannes Weiner Cc: Hugh Dickins Signed-off-by: Ingo Molnar Signed-off-by: Mel Gorman --- mm/mmap.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'mm/mmap.c') diff --git a/mm/mmap.c b/mm/mmap.c index 9a796c41e7d9..88408632da66 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2561,15 +2561,15 @@ static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma) * The LSB of head.next can't change from under us * because we hold the mm_all_locks_mutex. */ - mutex_lock_nest_lock(&anon_vma->root->mutex, &mm->mmap_sem); + down_write(&anon_vma->root->rwsem); /* * We can safely modify head.next after taking the - * anon_vma->root->mutex. If some other vma in this mm shares + * anon_vma->root->rwsem. If some other vma in this mm shares * the same anon_vma we won't take it again. * * No need of atomic instructions here, head.next * can't change from under us thanks to the - * anon_vma->root->mutex. + * anon_vma->root->rwsem. */ if (__test_and_set_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_node)) @@ -2671,7 +2671,7 @@ static void vm_unlock_anon_vma(struct anon_vma *anon_vma) * * No need of atomic instructions here, head.next * can't change from under us until we release the - * anon_vma->root->mutex. + * anon_vma->root->rwsem. */ if (!__test_and_clear_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_node)) -- cgit v1.2.3 From 4fc3f1d66b1ef0d7b8dc11f4ff1cc510f78b37d6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 2 Dec 2012 19:56:50 +0000 Subject: mm/rmap, migration: Make rmap_walk_anon() and try_to_unmap_anon() more scalable rmap_walk_anon() and try_to_unmap_anon() appears to be too careful about locking the anon vma: while it needs protection against anon vma list modifications, it does not need exclusive access to the list itself. Transforming this exclusive lock to a read-locked rwsem removes a global lock from the hot path of page-migration intense threaded workloads which can cause pathological performance like this: 96.43% process 0 [kernel.kallsyms] [k] perf_trace_sched_switch | --- perf_trace_sched_switch __schedule schedule schedule_preempt_disabled __mutex_lock_common.isra.6 __mutex_lock_slowpath mutex_lock | |--50.61%-- rmap_walk | move_to_new_page | migrate_pages | migrate_misplaced_page | __do_numa_page.isra.69 | handle_pte_fault | handle_mm_fault | __do_page_fault | do_page_fault | page_fault | __memset_sse2 | | | --100.00%-- worker_thread | | | --100.00%-- start_thread | --49.39%-- page_lock_anon_vma try_to_unmap_anon try_to_unmap migrate_pages migrate_misplaced_page __do_numa_page.isra.69 handle_pte_fault handle_mm_fault __do_page_fault do_page_fault page_fault __memset_sse2 | --100.00%-- worker_thread start_thread With this change applied the profile is now nicely flat and there's no anon-vma related scheduling/blocking. Rename anon_vma_[un]lock() => anon_vma_[un]lock_write(), to make it clearer that it's an exclusive write-lock in that case - suggested by Rik van Riel. Suggested-by: Linus Torvalds Cc: Peter Zijlstra Cc: Paul Turner Cc: Lee Schermerhorn Cc: Christoph Lameter Cc: Rik van Riel Cc: Mel Gorman Cc: Andrea Arcangeli Cc: Johannes Weiner Cc: Hugh Dickins Signed-off-by: Ingo Molnar Signed-off-by: Mel Gorman --- mm/mmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/mmap.c') diff --git a/mm/mmap.c b/mm/mmap.c index 88408632da66..68a16b40c209 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -602,7 +602,7 @@ again: remove_next = 1 + (end > next->vm_end); if (anon_vma) { VM_BUG_ON(adjust_next && next->anon_vma && anon_vma != next->anon_vma); - anon_vma_lock(anon_vma); + anon_vma_lock_write(anon_vma); anon_vma_interval_tree_pre_update_vma(vma); if (adjust_next) anon_vma_interval_tree_pre_update_vma(next); -- cgit v1.2.3