From 42d7395feb56f0655cd8b68e06fc6063823449f8 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 11 Dec 2012 16:01:34 -0800 Subject: mm: support more pagesizes for MAP_HUGETLB/SHM_HUGETLB There was some desire in large applications using MAP_HUGETLB or SHM_HUGETLB to use 1GB huge pages on some mappings, and stay with 2MB on others. This is useful together with NUMA policy: use 2MB interleaving on some mappings, but 1GB on local mappings. This patch extends the IPC/SHM syscall interfaces slightly to allow specifying the page size. It borrows some upper bits in the existing flag arguments and allows encoding the log of the desired page size in addition to the *_HUGETLB flag. When 0 is specified the default size is used, this makes the change fully compatible. Extending the internal hugetlb code to handle this is straight forward. Instead of a single mount it just keeps an array of them and selects the right mount based on the specified page size. When no page size is specified it uses the mount of the default page size. The change is not visible in /proc/mounts because internal mounts don't appear there. It also has very little overhead: the additional mounts just consume a super block, but not more memory when not used. I also exported the new flags to the user headers (they were previously under __KERNEL__). Right now only symbols for x86 and some other architecture for 1GB and 2MB are defined. The interface should already work for all other architectures though. Only architectures that define multiple hugetlb sizes actually need it (that is currently x86, tile, powerpc). However tile and powerpc have user configurable hugetlb sizes, so it's not easy to add defines. A program on those architectures would need to query sysfs and use the appropiate log2. [akpm@linux-foundation.org: cleanups] [rientjes@google.com: fix build] [akpm@linux-foundation.org: checkpatch fixes] Signed-off-by: Andi Kleen Cc: Michael Kerrisk Acked-by: Rik van Riel Acked-by: KAMEZAWA Hiroyuki Cc: Hillf Danton Signed-off-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mmap.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'mm/mmap.c') diff --git a/mm/mmap.c b/mm/mmap.c index 9a796c41e7d9..ebf19031c5e4 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1153,8 +1153,9 @@ SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, * memory so no accounting is necessary */ file = hugetlb_file_setup(HUGETLB_ANON_FILE, addr, len, - VM_NORESERVE, &user, - HUGETLB_ANONHUGE_INODE); + VM_NORESERVE, + &user, HUGETLB_ANONHUGE_INODE, + (flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK); if (IS_ERR(file)) return PTR_ERR(file); } -- cgit v1.2.3 From d37371870ceb1d2165397dc36114725b6dca946c Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Tue, 11 Dec 2012 16:01:38 -0800 Subject: mm: augment vma rbtree with rb_subtree_gap Define vma->rb_subtree_gap as the largest gap between any vma in the subtree rooted at that vma, and their predecessor. Or, for a recursive definition, vma->rb_subtree_gap is the max of: - vma->vm_start - vma->vm_prev->vm_end - rb_subtree_gap fields of the vmas pointed by vma->rb.rb_left and vma->rb.rb_right This will allow get_unmapped_area_* to find a free area of the right size in O(log(N)) time, instead of potentially having to do a linear walk across all the VMAs. Also define mm->highest_vm_end as the vm_end field of the highest vma, so that we can easily check if the following gap is suitable. This does have the potential to make unmapping VMAs more expensive, especially for processes with very large numbers of VMAs, where the VMA rbtree can grow quite deep. Signed-off-by: Michel Lespinasse Reviewed-by: Rik van Riel Cc: Hugh Dickins Cc: Russell King Cc: Ralf Baechle Cc: Paul Mundt Cc: "David S. Miller" Cc: Chris Metcalf Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mmap.c | 143 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 132 insertions(+), 11 deletions(-) (limited to 'mm/mmap.c') diff --git a/mm/mmap.c b/mm/mmap.c index ebf19031c5e4..bdcea6310fff 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -297,6 +298,27 @@ out: return retval; } +static long vma_compute_subtree_gap(struct vm_area_struct *vma) +{ + unsigned long max, subtree_gap; + max = vma->vm_start; + if (vma->vm_prev) + max -= vma->vm_prev->vm_end; + if (vma->vm_rb.rb_left) { + subtree_gap = rb_entry(vma->vm_rb.rb_left, + struct vm_area_struct, vm_rb)->rb_subtree_gap; + if (subtree_gap > max) + max = subtree_gap; + } + if (vma->vm_rb.rb_right) { + subtree_gap = rb_entry(vma->vm_rb.rb_right, + struct vm_area_struct, vm_rb)->rb_subtree_gap; + if (subtree_gap > max) + max = subtree_gap; + } + return max; +} + #ifdef CONFIG_DEBUG_VM_RB static int browse_rb(struct rb_root *root) { @@ -327,6 +349,18 @@ static int browse_rb(struct rb_root *root) return i; } +static void validate_mm_rb(struct rb_root *root, struct vm_area_struct *ignore) +{ + struct rb_node *nd; + + for (nd = rb_first(root); nd; nd = rb_next(nd)) { + struct vm_area_struct *vma; + vma = rb_entry(nd, struct vm_area_struct, vm_rb); + BUG_ON(vma != ignore && + vma->rb_subtree_gap != vma_compute_subtree_gap(vma)); + } +} + void validate_mm(struct mm_struct *mm) { int bug = 0; @@ -349,9 +383,52 @@ void validate_mm(struct mm_struct *mm) BUG_ON(bug); } #else +#define validate_mm_rb(root, ignore) do { } while (0) #define validate_mm(mm) do { } while (0) #endif +RB_DECLARE_CALLBACKS(static, vma_gap_callbacks, struct vm_area_struct, vm_rb, + unsigned long, rb_subtree_gap, vma_compute_subtree_gap) + +/* + * Update augmented rbtree rb_subtree_gap values after vma->vm_start or + * vma->vm_prev->vm_end values changed, without modifying the vma's position + * in the rbtree. + */ +static void vma_gap_update(struct vm_area_struct *vma) +{ + /* + * As it turns out, RB_DECLARE_CALLBACKS() already created a callback + * function that does exacltly what we want. + */ + vma_gap_callbacks_propagate(&vma->vm_rb, NULL); +} + +static inline void vma_rb_insert(struct vm_area_struct *vma, + struct rb_root *root) +{ + /* All rb_subtree_gap values must be consistent prior to insertion */ + validate_mm_rb(root, NULL); + + rb_insert_augmented(&vma->vm_rb, root, &vma_gap_callbacks); +} + +static void vma_rb_erase(struct vm_area_struct *vma, struct rb_root *root) +{ + /* + * All rb_subtree_gap values must be consistent prior to erase, + * with the possible exception of the vma being erased. + */ + validate_mm_rb(root, vma); + + /* + * Note rb_erase_augmented is a fairly large inline function, + * so make sure we instantiate it only once with our desired + * augmented rbtree callbacks. + */ + rb_erase_augmented(&vma->vm_rb, root, &vma_gap_callbacks); +} + /* * vma has some anon_vma assigned, and is already inserted on that * anon_vma's interval trees. @@ -421,8 +498,25 @@ static int find_vma_links(struct mm_struct *mm, unsigned long addr, void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma, struct rb_node **rb_link, struct rb_node *rb_parent) { + /* Update tracking information for the gap following the new vma. */ + if (vma->vm_next) + vma_gap_update(vma->vm_next); + else + mm->highest_vm_end = vma->vm_end; + + /* + * vma->vm_prev wasn't known when we followed the rbtree to find the + * correct insertion point for that vma. As a result, we could not + * update the vma vm_rb parents rb_subtree_gap values on the way down. + * So, we first insert the vma with a zero rb_subtree_gap value + * (to be consistent with what we did on the way down), and then + * immediately update the gap to the correct value. Finally we + * rebalance the rbtree after all augmented values have been set. + */ rb_link_node(&vma->vm_rb, rb_parent, rb_link); - rb_insert_color(&vma->vm_rb, &mm->mm_rb); + vma->rb_subtree_gap = 0; + vma_gap_update(vma); + vma_rb_insert(vma, &mm->mm_rb); } static void __vma_link_file(struct vm_area_struct *vma) @@ -498,12 +592,12 @@ static inline void __vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma, struct vm_area_struct *prev) { - struct vm_area_struct *next = vma->vm_next; + struct vm_area_struct *next; - prev->vm_next = next; + vma_rb_erase(vma, &mm->mm_rb); + prev->vm_next = next = vma->vm_next; if (next) next->vm_prev = prev; - rb_erase(&vma->vm_rb, &mm->mm_rb); if (mm->mmap_cache == vma) mm->mmap_cache = prev; } @@ -525,6 +619,7 @@ int vma_adjust(struct vm_area_struct *vma, unsigned long start, struct rb_root *root = NULL; struct anon_vma *anon_vma = NULL; struct file *file = vma->vm_file; + bool start_changed = false, end_changed = false; long adjust_next = 0; int remove_next = 0; @@ -615,8 +710,14 @@ again: remove_next = 1 + (end > next->vm_end); vma_interval_tree_remove(next, root); } - vma->vm_start = start; - vma->vm_end = end; + if (start != vma->vm_start) { + vma->vm_start = start; + start_changed = true; + } + if (end != vma->vm_end) { + vma->vm_end = end; + end_changed = true; + } vma->vm_pgoff = pgoff; if (adjust_next) { next->vm_start += adjust_next << PAGE_SHIFT; @@ -645,6 +746,15 @@ again: remove_next = 1 + (end > next->vm_end); * (it may either follow vma or precede it). */ __insert_vm_struct(mm, insert); + } else { + if (start_changed) + vma_gap_update(vma); + if (end_changed) { + if (!next) + mm->highest_vm_end = end; + else if (!adjust_next) + vma_gap_update(next); + } } if (anon_vma) { @@ -678,10 +788,13 @@ again: remove_next = 1 + (end > next->vm_end); * we must remove another next too. It would clutter * up the code too much to do both in one go. */ - if (remove_next == 2) { - next = vma->vm_next; + next = vma->vm_next; + if (remove_next == 2) goto again; - } + else if (next) + vma_gap_update(next); + else + mm->highest_vm_end = end; } if (insert && file) uprobe_mmap(insert); @@ -1784,6 +1897,10 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address) anon_vma_interval_tree_pre_update_vma(vma); vma->vm_end = address; anon_vma_interval_tree_post_update_vma(vma); + if (vma->vm_next) + vma_gap_update(vma->vm_next); + else + vma->vm_mm->highest_vm_end = address; perf_event_mmap(vma); } } @@ -1838,6 +1955,7 @@ int expand_downwards(struct vm_area_struct *vma, vma->vm_start = address; vma->vm_pgoff -= grow; anon_vma_interval_tree_post_update_vma(vma); + vma_gap_update(vma); perf_event_mmap(vma); } } @@ -1960,14 +2078,17 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma, insertion_point = (prev ? &prev->vm_next : &mm->mmap); vma->vm_prev = NULL; do { - rb_erase(&vma->vm_rb, &mm->mm_rb); + vma_rb_erase(vma, &mm->mm_rb); mm->map_count--; tail_vma = vma; vma = vma->vm_next; } while (vma && vma->vm_start < end); *insertion_point = vma; - if (vma) + if (vma) { vma->vm_prev = prev; + vma_gap_update(vma); + } else + mm->highest_vm_end = prev ? prev->vm_end : 0; tail_vma->vm_next = NULL; if (mm->unmap_area == arch_unmap_area) addr = prev ? prev->vm_end : mm->mmap_base; -- cgit v1.2.3 From 5a0768f641a5bad844860e67250baf0d1aa5e03c Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Tue, 11 Dec 2012 16:01:42 -0800 Subject: mm: check rb_subtree_gap correctness When CONFIG_DEBUG_VM_RB is enabled, check that rb_subtree_gap is correctly set for every vma and that mm->highest_vm_end is also correct. Also add an explicit 'bug' variable to track if browse_rb() detected any invalid condition. [akpm@linux-foundation.org: repair innovative coding-style inventions] Signed-off-by: Michel Lespinasse Reviewed-by: Rik van Riel Cc: Hugh Dickins Cc: Russell King Cc: Ralf Baechle Cc: Paul Mundt Cc: "David S. Miller" Cc: Chris Metcalf Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mmap.c | 53 +++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 39 insertions(+), 14 deletions(-) (limited to 'mm/mmap.c') diff --git a/mm/mmap.c b/mm/mmap.c index bdcea6310fff..ff93f6c8436c 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -322,31 +322,45 @@ static long vma_compute_subtree_gap(struct vm_area_struct *vma) #ifdef CONFIG_DEBUG_VM_RB static int browse_rb(struct rb_root *root) { - int i = 0, j; + int i = 0, j, bug = 0; struct rb_node *nd, *pn = NULL; unsigned long prev = 0, pend = 0; for (nd = rb_first(root); nd; nd = rb_next(nd)) { struct vm_area_struct *vma; vma = rb_entry(nd, struct vm_area_struct, vm_rb); - if (vma->vm_start < prev) - printk("vm_start %lx prev %lx\n", vma->vm_start, prev), i = -1; - if (vma->vm_start < pend) + if (vma->vm_start < prev) { + printk("vm_start %lx prev %lx\n", vma->vm_start, prev); + bug = 1; + } + if (vma->vm_start < pend) { printk("vm_start %lx pend %lx\n", vma->vm_start, pend); - if (vma->vm_start > vma->vm_end) - printk("vm_end %lx < vm_start %lx\n", vma->vm_end, vma->vm_start); + bug = 1; + } + if (vma->vm_start > vma->vm_end) { + printk("vm_end %lx < vm_start %lx\n", + vma->vm_end, vma->vm_start); + bug = 1; + } + if (vma->rb_subtree_gap != vma_compute_subtree_gap(vma)) { + printk("free gap %lx, correct %lx\n", + vma->rb_subtree_gap, + vma_compute_subtree_gap(vma)); + bug = 1; + } i++; pn = nd; prev = vma->vm_start; pend = vma->vm_end; } j = 0; - for (nd = pn; nd; nd = rb_prev(nd)) { + for (nd = pn; nd; nd = rb_prev(nd)) j++; + if (i != j) { + printk("backwards %d, forwards %d\n", j, i); + bug = 1; } - if (i != j) - printk("backwards %d, forwards %d\n", j, i), i = 0; - return i; + return bug ? -1 : i; } static void validate_mm_rb(struct rb_root *root, struct vm_area_struct *ignore) @@ -365,6 +379,7 @@ void validate_mm(struct mm_struct *mm) { int bug = 0; int i = 0; + unsigned long highest_address = 0; struct vm_area_struct *vma = mm->mmap; while (vma) { struct anon_vma_chain *avc; @@ -372,14 +387,24 @@ void validate_mm(struct mm_struct *mm) list_for_each_entry(avc, &vma->anon_vma_chain, same_vma) anon_vma_interval_tree_verify(avc); vma_unlock_anon_vma(vma); + highest_address = vma->vm_end; vma = vma->vm_next; i++; } - if (i != mm->map_count) - printk("map_count %d vm_next %d\n", mm->map_count, i), bug = 1; + if (i != mm->map_count) { + printk("map_count %d vm_next %d\n", mm->map_count, i); + bug = 1; + } + if (highest_address != mm->highest_vm_end) { + printk("mm->highest_vm_end %lx, found %lx\n", + mm->highest_vm_end, highest_address); + bug = 1; + } i = browse_rb(&mm->mm_rb); - if (i != mm->map_count) - printk("map_count %d rb %d\n", mm->map_count, i), bug = 1; + if (i != mm->map_count) { + printk("map_count %d rb %d\n", mm->map_count, i); + bug = 1; + } BUG_ON(bug); } #else -- cgit v1.2.3 From db4fbfb9523c93583c339e66023506f651c1d54b Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Tue, 11 Dec 2012 16:01:49 -0800 Subject: mm: vm_unmapped_area() lookup function Implement vm_unmapped_area() using the rb_subtree_gap and highest_vm_end information to look up for suitable virtual address space gaps. struct vm_unmapped_area_info is used to define the desired allocation request: - lowest or highest possible address matching the remaining constraints - desired gap length - low/high address limits that the gap must fit into - alignment mask and offset Also update the generic arch_get_unmapped_area[_topdown] functions to make use of vm_unmapped_area() instead of implementing a brute force search. [akpm@linux-foundation.org: checkpatch fixes] Signed-off-by: Michel Lespinasse Reviewed-by: Rik van Riel Cc: Hugh Dickins Cc: Russell King Cc: Ralf Baechle Cc: Paul Mundt Cc: "David S. Miller" Cc: Chris Metcalf Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mmap.c | 312 ++++++++++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 222 insertions(+), 90 deletions(-) (limited to 'mm/mmap.c') diff --git a/mm/mmap.c b/mm/mmap.c index ff93f6c8436c..5646677a96d5 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1539,6 +1539,206 @@ unacct_error: return error; } +unsigned long unmapped_area(struct vm_unmapped_area_info *info) +{ + /* + * We implement the search by looking for an rbtree node that + * immediately follows a suitable gap. That is, + * - gap_start = vma->vm_prev->vm_end <= info->high_limit - length; + * - gap_end = vma->vm_start >= info->low_limit + length; + * - gap_end - gap_start >= length + */ + + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned long length, low_limit, high_limit, gap_start, gap_end; + + /* Adjust search length to account for worst case alignment overhead */ + length = info->length + info->align_mask; + if (length < info->length) + return -ENOMEM; + + /* Adjust search limits by the desired length */ + if (info->high_limit < length) + return -ENOMEM; + high_limit = info->high_limit - length; + + if (info->low_limit > high_limit) + return -ENOMEM; + low_limit = info->low_limit + length; + + /* Check if rbtree root looks promising */ + if (RB_EMPTY_ROOT(&mm->mm_rb)) + goto check_highest; + vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb); + if (vma->rb_subtree_gap < length) + goto check_highest; + + while (true) { + /* Visit left subtree if it looks promising */ + gap_end = vma->vm_start; + if (gap_end >= low_limit && vma->vm_rb.rb_left) { + struct vm_area_struct *left = + rb_entry(vma->vm_rb.rb_left, + struct vm_area_struct, vm_rb); + if (left->rb_subtree_gap >= length) { + vma = left; + continue; + } + } + + gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0; +check_current: + /* Check if current node has a suitable gap */ + if (gap_start > high_limit) + return -ENOMEM; + if (gap_end >= low_limit && gap_end - gap_start >= length) + goto found; + + /* Visit right subtree if it looks promising */ + if (vma->vm_rb.rb_right) { + struct vm_area_struct *right = + rb_entry(vma->vm_rb.rb_right, + struct vm_area_struct, vm_rb); + if (right->rb_subtree_gap >= length) { + vma = right; + continue; + } + } + + /* Go back up the rbtree to find next candidate node */ + while (true) { + struct rb_node *prev = &vma->vm_rb; + if (!rb_parent(prev)) + goto check_highest; + vma = rb_entry(rb_parent(prev), + struct vm_area_struct, vm_rb); + if (prev == vma->vm_rb.rb_left) { + gap_start = vma->vm_prev->vm_end; + gap_end = vma->vm_start; + goto check_current; + } + } + } + +check_highest: + /* Check highest gap, which does not precede any rbtree node */ + gap_start = mm->highest_vm_end; + gap_end = ULONG_MAX; /* Only for VM_BUG_ON below */ + if (gap_start > high_limit) + return -ENOMEM; + +found: + /* We found a suitable gap. Clip it with the original low_limit. */ + if (gap_start < info->low_limit) + gap_start = info->low_limit; + + /* Adjust gap address to the desired alignment */ + gap_start += (info->align_offset - gap_start) & info->align_mask; + + VM_BUG_ON(gap_start + info->length > info->high_limit); + VM_BUG_ON(gap_start + info->length > gap_end); + return gap_start; +} + +unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned long length, low_limit, high_limit, gap_start, gap_end; + + /* Adjust search length to account for worst case alignment overhead */ + length = info->length + info->align_mask; + if (length < info->length) + return -ENOMEM; + + /* + * Adjust search limits by the desired length. + * See implementation comment at top of unmapped_area(). + */ + gap_end = info->high_limit; + if (gap_end < length) + return -ENOMEM; + high_limit = gap_end - length; + + if (info->low_limit > high_limit) + return -ENOMEM; + low_limit = info->low_limit + length; + + /* Check highest gap, which does not precede any rbtree node */ + gap_start = mm->highest_vm_end; + if (gap_start <= high_limit) + goto found_highest; + + /* Check if rbtree root looks promising */ + if (RB_EMPTY_ROOT(&mm->mm_rb)) + return -ENOMEM; + vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb); + if (vma->rb_subtree_gap < length) + return -ENOMEM; + + while (true) { + /* Visit right subtree if it looks promising */ + gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0; + if (gap_start <= high_limit && vma->vm_rb.rb_right) { + struct vm_area_struct *right = + rb_entry(vma->vm_rb.rb_right, + struct vm_area_struct, vm_rb); + if (right->rb_subtree_gap >= length) { + vma = right; + continue; + } + } + +check_current: + /* Check if current node has a suitable gap */ + gap_end = vma->vm_start; + if (gap_end < low_limit) + return -ENOMEM; + if (gap_start <= high_limit && gap_end - gap_start >= length) + goto found; + + /* Visit left subtree if it looks promising */ + if (vma->vm_rb.rb_left) { + struct vm_area_struct *left = + rb_entry(vma->vm_rb.rb_left, + struct vm_area_struct, vm_rb); + if (left->rb_subtree_gap >= length) { + vma = left; + continue; + } + } + + /* Go back up the rbtree to find next candidate node */ + while (true) { + struct rb_node *prev = &vma->vm_rb; + if (!rb_parent(prev)) + return -ENOMEM; + vma = rb_entry(rb_parent(prev), + struct vm_area_struct, vm_rb); + if (prev == vma->vm_rb.rb_right) { + gap_start = vma->vm_prev ? + vma->vm_prev->vm_end : 0; + goto check_current; + } + } + } + +found: + /* We found a suitable gap. Clip it with the original high_limit. */ + if (gap_end > info->high_limit) + gap_end = info->high_limit; + +found_highest: + /* Compute highest gap address at the desired alignment */ + gap_end -= info->length; + gap_end -= (gap_end - info->align_offset) & info->align_mask; + + VM_BUG_ON(gap_end < info->low_limit); + VM_BUG_ON(gap_end < gap_start); + return gap_end; +} + /* Get an address range which is currently unmapped. * For shmat() with addr=0. * @@ -1557,7 +1757,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; - unsigned long start_addr; + struct vm_unmapped_area_info info; if (len > TASK_SIZE) return -ENOMEM; @@ -1572,40 +1772,13 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, (!vma || addr + len <= vma->vm_start)) return addr; } - if (len > mm->cached_hole_size) { - start_addr = addr = mm->free_area_cache; - } else { - start_addr = addr = TASK_UNMAPPED_BASE; - mm->cached_hole_size = 0; - } -full_search: - for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { - /* At this point: (!vma || addr < vma->vm_end). */ - if (TASK_SIZE - len < addr) { - /* - * Start a new search - just in case we missed - * some holes. - */ - if (start_addr != TASK_UNMAPPED_BASE) { - addr = TASK_UNMAPPED_BASE; - start_addr = addr; - mm->cached_hole_size = 0; - goto full_search; - } - return -ENOMEM; - } - if (!vma || addr + len <= vma->vm_start) { - /* - * Remember the place where we stopped the search: - */ - mm->free_area_cache = addr + len; - return addr; - } - if (addr + mm->cached_hole_size < vma->vm_start) - mm->cached_hole_size = vma->vm_start - addr; - addr = vma->vm_end; - } + info.flags = 0; + info.length = len; + info.low_limit = TASK_UNMAPPED_BASE; + info.high_limit = TASK_SIZE; + info.align_mask = 0; + return vm_unmapped_area(&info); } #endif @@ -1630,7 +1803,8 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, { struct vm_area_struct *vma; struct mm_struct *mm = current->mm; - unsigned long addr = addr0, start_addr; + unsigned long addr = addr0; + struct vm_unmapped_area_info info; /* requested length too big for entire address space */ if (len > TASK_SIZE) @@ -1648,53 +1822,12 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, return addr; } - /* check if free_area_cache is useful for us */ - if (len <= mm->cached_hole_size) { - mm->cached_hole_size = 0; - mm->free_area_cache = mm->mmap_base; - } - -try_again: - /* either no address requested or can't fit in requested address hole */ - start_addr = addr = mm->free_area_cache; - - if (addr < len) - goto fail; - - addr -= len; - do { - /* - * Lookup failure means no vma is above this address, - * else if new region fits below vma->vm_start, - * return with success: - */ - vma = find_vma(mm, addr); - if (!vma || addr+len <= vma->vm_start) - /* remember the address as a hint for next time */ - return (mm->free_area_cache = addr); - - /* remember the largest hole we saw so far */ - if (addr + mm->cached_hole_size < vma->vm_start) - mm->cached_hole_size = vma->vm_start - addr; - - /* try just below the current vma->vm_start */ - addr = vma->vm_start-len; - } while (len < vma->vm_start); - -fail: - /* - * if hint left us with no space for the requested - * mapping then try again: - * - * Note: this is different with the case of bottomup - * which does the fully line-search, but we use find_vma - * here that causes some holes skipped. - */ - if (start_addr != mm->mmap_base) { - mm->free_area_cache = mm->mmap_base; - mm->cached_hole_size = 0; - goto try_again; - } + info.flags = VM_UNMAPPED_AREA_TOPDOWN; + info.length = len; + info.low_limit = PAGE_SIZE; + info.high_limit = mm->mmap_base; + info.align_mask = 0; + addr = vm_unmapped_area(&info); /* * A failed mmap() very likely causes application failure, @@ -1702,14 +1835,13 @@ fail: * can happen with large stack limits and large mmap() * allocations. */ - mm->cached_hole_size = ~0UL; - mm->free_area_cache = TASK_UNMAPPED_BASE; - addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); - /* - * Restore the topdown base: - */ - mm->free_area_cache = mm->mmap_base; - mm->cached_hole_size = ~0UL; + if (addr & ~PAGE_MASK) { + VM_BUG_ON(addr != -ENOMEM); + info.flags = 0; + info.low_limit = TASK_UNMAPPED_BASE; + info.high_limit = TASK_SIZE; + addr = vm_unmapped_area(&info); + } return addr; } -- cgit v1.2.3