summaryrefslogtreecommitdiff
path: root/mm/userfaultfd.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/userfaultfd.c')
-rw-r--r--mm/userfaultfd.c74
1 files changed, 58 insertions, 16 deletions
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index a0817cc470b0..1e5c2f94e8a3 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -154,6 +154,8 @@ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
unsigned long len,
bool zeropage)
{
+ int vm_alloc_shared = dst_vma->vm_flags & VM_SHARED;
+ int vm_shared = dst_vma->vm_flags & VM_SHARED;
ssize_t err;
pte_t *dst_pte;
unsigned long src_addr, dst_addr;
@@ -204,14 +206,14 @@ retry:
goto out_unlock;
/*
- * Make sure the vma is not shared, that the remaining dst
- * range is both valid and fully within a single existing vma.
+ * Make sure the remaining dst range is both valid and
+ * fully within a single existing vma.
*/
- if (dst_vma->vm_flags & VM_SHARED)
- goto out_unlock;
if (dst_start < dst_vma->vm_start ||
dst_start + len > dst_vma->vm_end)
goto out_unlock;
+
+ vm_shared = dst_vma->vm_flags & VM_SHARED;
}
if (WARN_ON(dst_addr & (vma_hpagesize - 1) ||
@@ -225,11 +227,13 @@ retry:
goto out_unlock;
/*
- * Ensure the dst_vma has a anon_vma.
+ * If not shared, ensure the dst_vma has a anon_vma.
*/
err = -ENOMEM;
- if (unlikely(anon_vma_prepare(dst_vma)))
- goto out_unlock;
+ if (!vm_shared) {
+ if (unlikely(anon_vma_prepare(dst_vma)))
+ goto out_unlock;
+ }
h = hstate_vma(dst_vma);
@@ -266,6 +270,7 @@ retry:
dst_addr, src_addr, &page);
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
+ vm_alloc_shared = vm_shared;
cond_resched();
@@ -305,18 +310,49 @@ out:
if (page) {
/*
* We encountered an error and are about to free a newly
- * allocated huge page. It is possible that there was a
- * reservation associated with the page that has been
- * consumed. See the routine restore_reserve_on_error
- * for details. Unfortunately, we can not call
- * restore_reserve_on_error now as it would require holding
- * mmap_sem. Clear the PagePrivate flag so that the global
+ * allocated huge page.
+ *
+ * Reservation handling is very subtle, and is different for
+ * private and shared mappings. See the routine
+ * restore_reserve_on_error for details. Unfortunately, we
+ * can not call restore_reserve_on_error now as it would
+ * require holding mmap_sem.
+ *
+ * If a reservation for the page existed in the reservation
+ * map of a private mapping, the map was modified to indicate
+ * the reservation was consumed when the page was allocated.
+ * We clear the PagePrivate flag now so that the global
* reserve count will not be incremented in free_huge_page.
* The reservation map will still indicate the reservation
* was consumed and possibly prevent later page allocation.
- * This is better than leaking a global reservation.
+ * This is better than leaking a global reservation. If no
+ * reservation existed, it is still safe to clear PagePrivate
+ * as no adjustments to reservation counts were made during
+ * allocation.
+ *
+ * The reservation map for shared mappings indicates which
+ * pages have reservations. When a huge page is allocated
+ * for an address with a reservation, no change is made to
+ * the reserve map. In this case PagePrivate will be set
+ * to indicate that the global reservation count should be
+ * incremented when the page is freed. This is the desired
+ * behavior. However, when a huge page is allocated for an
+ * address without a reservation a reservation entry is added
+ * to the reservation map, and PagePrivate will not be set.
+ * When the page is freed, the global reserve count will NOT
+ * be incremented and it will appear as though we have leaked
+ * reserved page. In this case, set PagePrivate so that the
+ * global reserve count will be incremented to match the
+ * reservation map entry which was created.
+ *
+ * Note that vm_alloc_shared is based on the flags of the vma
+ * for which the page was originally allocated. dst_vma could
+ * be different or NULL on error.
*/
- ClearPagePrivate(page);
+ if (vm_alloc_shared)
+ SetPagePrivate(page);
+ else
+ ClearPagePrivate(page);
put_page(page);
}
BUG_ON(copied < 0);
@@ -372,8 +408,14 @@ retry:
dst_vma = find_vma(dst_mm, dst_start);
if (!dst_vma)
goto out_unlock;
- if (!vma_is_shmem(dst_vma) && dst_vma->vm_flags & VM_SHARED)
+ /*
+ * shmem_zero_setup is invoked in mmap for MAP_ANONYMOUS|MAP_SHARED but
+ * it will overwrite vm_ops, so vma_is_anonymous must return false.
+ */
+ if (WARN_ON_ONCE(vma_is_anonymous(dst_vma) &&
+ dst_vma->vm_flags & VM_SHARED))
goto out_unlock;
+
if (dst_start < dst_vma->vm_start ||
dst_start + len > dst_vma->vm_end)
goto out_unlock;