summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-10-19 08:39:47 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2016-10-19 08:39:47 -0700
commit63ae602cea637ee4a6490d940c0da5d78bd0bbe0 (patch)
tree5da1eec3756fec9d62455c498442944a7f35149b /mm
parent1a1891d762d6e64daf07b5be4817e3fbb29e3c59 (diff)
parentf307ab6dcea03f9d8e4d70508fd7d1ca57cfa7f9 (diff)
Merge branch 'gup_flag-cleanups'
Merge the gup_flags cleanups from Lorenzo Stoakes: "This patch series adjusts functions in the get_user_pages* family such that desired FOLL_* flags are passed as an argument rather than implied by flags. The purpose of this change is to make the use of FOLL_FORCE explicit so it is easier to grep for and clearer to callers that this flag is being used. The use of FOLL_FORCE is an issue as it overrides missing VM_READ/VM_WRITE flags for the VMA whose pages we are reading from/writing to, which can result in surprising behaviour. The patch series came out of the discussion around commit 38e088546522 ("mm: check VMA flags to avoid invalid PROT_NONE NUMA balancing"), which addressed a BUG_ON() being triggered when a page was faulted in with PROT_NONE set but having been overridden by FOLL_FORCE. do_numa_page() was run on the assumption the page _must_ be one marked for NUMA node migration as an actual PROT_NONE page would have been dealt with prior to this code path, however FOLL_FORCE introduced a situation where this assumption did not hold. See https://marc.info/?l=linux-mm&m=147585445805166 for the patch proposal" Additionally, there's a fix for an ancient bug related to FOLL_FORCE and FOLL_WRITE by me. [ This branch was rebased recently to add a few more acked-by's and reviewed-by's ] * gup_flag-cleanups: mm: replace access_process_vm() write parameter with gup_flags mm: replace access_remote_vm() write parameter with gup_flags mm: replace __access_remote_vm() write parameter with gup_flags mm: replace get_user_pages_remote() write/force parameters with gup_flags mm: replace get_user_pages() write/force parameters with gup_flags mm: replace get_vaddr_frames() write/force parameters with gup_flags mm: replace get_user_pages_locked() write/force parameters with gup_flags mm: replace get_user_pages_unlocked() write/force parameters with gup_flags mm: remove write/force parameters from __get_user_pages_unlocked() mm: remove write/force parameters from __get_user_pages_locked() mm: remove gup_flags FOLL_WRITE games from __get_user_pages()
Diffstat (limited to 'mm')
-rw-r--r--mm/frame_vector.c9
-rw-r--r--mm/gup.c64
-rw-r--r--mm/memory.c16
-rw-r--r--mm/mempolicy.c2
-rw-r--r--mm/nommu.c38
-rw-r--r--mm/process_vm_access.c7
-rw-r--r--mm/util.c8
7 files changed, 73 insertions, 71 deletions
diff --git a/mm/frame_vector.c b/mm/frame_vector.c
index 381bb07ed14f..db77dcb38afd 100644
--- a/mm/frame_vector.c
+++ b/mm/frame_vector.c
@@ -11,10 +11,7 @@
* get_vaddr_frames() - map virtual addresses to pfns
* @start: starting user address
* @nr_frames: number of pages / pfns from start to map
- * @write: whether pages will be written to by the caller
- * @force: whether to force write access even if user mapping is
- * readonly. See description of the same argument of
- get_user_pages().
+ * @gup_flags: flags modifying lookup behaviour
* @vec: structure which receives pages / pfns of the addresses mapped.
* It should have space for at least nr_frames entries.
*
@@ -34,7 +31,7 @@
* This function takes care of grabbing mmap_sem as necessary.
*/
int get_vaddr_frames(unsigned long start, unsigned int nr_frames,
- bool write, bool force, struct frame_vector *vec)
+ unsigned int gup_flags, struct frame_vector *vec)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
@@ -59,7 +56,7 @@ int get_vaddr_frames(unsigned long start, unsigned int nr_frames,
vec->got_ref = true;
vec->is_pfns = false;
ret = get_user_pages_locked(start, nr_frames,
- write, force, (struct page **)(vec->ptrs), &locked);
+ gup_flags, (struct page **)(vec->ptrs), &locked);
goto out;
}
diff --git a/mm/gup.c b/mm/gup.c
index 96b2b2fd0fbd..7aa113c2d373 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -60,6 +60,16 @@ static int follow_pfn_pte(struct vm_area_struct *vma, unsigned long address,
return -EEXIST;
}
+/*
+ * FOLL_FORCE can write to even unwritable pte's, but only
+ * after we've gone through a COW cycle and they are dirty.
+ */
+static inline bool can_follow_write_pte(pte_t pte, unsigned int flags)
+{
+ return pte_write(pte) ||
+ ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pte_dirty(pte));
+}
+
static struct page *follow_page_pte(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmd, unsigned int flags)
{
@@ -95,7 +105,7 @@ retry:
}
if ((flags & FOLL_NUMA) && pte_protnone(pte))
goto no_page;
- if ((flags & FOLL_WRITE) && !pte_write(pte)) {
+ if ((flags & FOLL_WRITE) && !can_follow_write_pte(pte, flags)) {
pte_unmap_unlock(ptep, ptl);
return NULL;
}
@@ -412,7 +422,7 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
* reCOWed by userspace write).
*/
if ((ret & VM_FAULT_WRITE) && !(vma->vm_flags & VM_WRITE))
- *flags &= ~FOLL_WRITE;
+ *flags |= FOLL_COW;
return 0;
}
@@ -729,7 +739,6 @@ static __always_inline long __get_user_pages_locked(struct task_struct *tsk,
struct mm_struct *mm,
unsigned long start,
unsigned long nr_pages,
- int write, int force,
struct page **pages,
struct vm_area_struct **vmas,
int *locked, bool notify_drop,
@@ -747,10 +756,6 @@ static __always_inline long __get_user_pages_locked(struct task_struct *tsk,
if (pages)
flags |= FOLL_GET;
- if (write)
- flags |= FOLL_WRITE;
- if (force)
- flags |= FOLL_FORCE;
pages_done = 0;
lock_dropped = false;
@@ -843,12 +848,12 @@ static __always_inline long __get_user_pages_locked(struct task_struct *tsk,
* up_read(&mm->mmap_sem);
*/
long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
- int write, int force, struct page **pages,
+ unsigned int gup_flags, struct page **pages,
int *locked)
{
return __get_user_pages_locked(current, current->mm, start, nr_pages,
- write, force, pages, NULL, locked, true,
- FOLL_TOUCH);
+ pages, NULL, locked, true,
+ gup_flags | FOLL_TOUCH);
}
EXPORT_SYMBOL(get_user_pages_locked);
@@ -864,14 +869,14 @@ EXPORT_SYMBOL(get_user_pages_locked);
*/
__always_inline long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm,
unsigned long start, unsigned long nr_pages,
- int write, int force, struct page **pages,
- unsigned int gup_flags)
+ struct page **pages, unsigned int gup_flags)
{
long ret;
int locked = 1;
+
down_read(&mm->mmap_sem);
- ret = __get_user_pages_locked(tsk, mm, start, nr_pages, write, force,
- pages, NULL, &locked, false, gup_flags);
+ ret = __get_user_pages_locked(tsk, mm, start, nr_pages, pages, NULL,
+ &locked, false, gup_flags);
if (locked)
up_read(&mm->mmap_sem);
return ret;
@@ -896,10 +901,10 @@ EXPORT_SYMBOL(__get_user_pages_unlocked);
* "force" parameter).
*/
long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
- int write, int force, struct page **pages)
+ struct page **pages, unsigned int gup_flags)
{
return __get_user_pages_unlocked(current, current->mm, start, nr_pages,
- write, force, pages, FOLL_TOUCH);
+ pages, gup_flags | FOLL_TOUCH);
}
EXPORT_SYMBOL(get_user_pages_unlocked);
@@ -910,9 +915,7 @@ EXPORT_SYMBOL(get_user_pages_unlocked);
* @mm: mm_struct of target mm
* @start: starting user address
* @nr_pages: number of pages from start to pin
- * @write: whether pages will be written to by the caller
- * @force: whether to force access even when user mapping is currently
- * protected (but never forces write access to shared mapping).
+ * @gup_flags: flags modifying lookup behaviour
* @pages: array that receives pointers to the pages pinned.
* Should be at least nr_pages long. Or NULL, if caller
* only intends to ensure the pages are faulted in.
@@ -941,9 +944,9 @@ EXPORT_SYMBOL(get_user_pages_unlocked);
* or similar operation cannot guarantee anything stronger anyway because
* locks can't be held over the syscall boundary.
*
- * If write=0, the page must not be written to. If the page is written to,
- * set_page_dirty (or set_page_dirty_lock, as appropriate) must be called
- * after the page is finished with, and before put_page is called.
+ * If gup_flags & FOLL_WRITE == 0, the page must not be written to. If the page
+ * is written to, set_page_dirty (or set_page_dirty_lock, as appropriate) must
+ * be called after the page is finished with, and before put_page is called.
*
* get_user_pages is typically used for fewer-copy IO operations, to get a
* handle on the memory by some means other than accesses via the user virtual
@@ -960,12 +963,12 @@ EXPORT_SYMBOL(get_user_pages_unlocked);
*/
long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
unsigned long start, unsigned long nr_pages,
- int write, int force, struct page **pages,
+ unsigned int gup_flags, struct page **pages,
struct vm_area_struct **vmas)
{
- return __get_user_pages_locked(tsk, mm, start, nr_pages, write, force,
- pages, vmas, NULL, false,
- FOLL_TOUCH | FOLL_REMOTE);
+ return __get_user_pages_locked(tsk, mm, start, nr_pages, pages, vmas,
+ NULL, false,
+ gup_flags | FOLL_TOUCH | FOLL_REMOTE);
}
EXPORT_SYMBOL(get_user_pages_remote);
@@ -976,12 +979,12 @@ EXPORT_SYMBOL(get_user_pages_remote);
* obviously don't pass FOLL_REMOTE in here.
*/
long get_user_pages(unsigned long start, unsigned long nr_pages,
- int write, int force, struct page **pages,
+ unsigned int gup_flags, struct page **pages,
struct vm_area_struct **vmas)
{
return __get_user_pages_locked(current, current->mm, start, nr_pages,
- write, force, pages, vmas, NULL, false,
- FOLL_TOUCH);
+ pages, vmas, NULL, false,
+ gup_flags | FOLL_TOUCH);
}
EXPORT_SYMBOL(get_user_pages);
@@ -1505,7 +1508,8 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
start += nr << PAGE_SHIFT;
pages += nr;
- ret = get_user_pages_unlocked(start, nr_pages - nr, write, 0, pages);
+ ret = get_user_pages_unlocked(start, nr_pages - nr, pages,
+ write ? FOLL_WRITE : 0);
/* Have to be a bit careful with return values */
if (nr > 0) {
diff --git a/mm/memory.c b/mm/memory.c
index fc1987dfd8cc..e18c57bdc75c 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3869,10 +3869,11 @@ EXPORT_SYMBOL_GPL(generic_access_phys);
* given task for page fault accounting.
*/
static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
- unsigned long addr, void *buf, int len, int write)
+ unsigned long addr, void *buf, int len, unsigned int gup_flags)
{
struct vm_area_struct *vma;
void *old_buf = buf;
+ int write = gup_flags & FOLL_WRITE;
down_read(&mm->mmap_sem);
/* ignore errors, just check how much was successfully transferred */
@@ -3882,7 +3883,7 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
struct page *page = NULL;
ret = get_user_pages_remote(tsk, mm, addr, 1,
- write, 1, &page, &vma);
+ gup_flags, &page, &vma);
if (ret <= 0) {
#ifndef CONFIG_HAVE_IOREMAP_PROT
break;
@@ -3934,14 +3935,14 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
* @addr: start address to access
* @buf: source or destination buffer
* @len: number of bytes to transfer
- * @write: whether the access is a write
+ * @gup_flags: flags modifying lookup behaviour
*
* The caller must hold a reference on @mm.
*/
int access_remote_vm(struct mm_struct *mm, unsigned long addr,
- void *buf, int len, int write)
+ void *buf, int len, unsigned int gup_flags)
{
- return __access_remote_vm(NULL, mm, addr, buf, len, write);
+ return __access_remote_vm(NULL, mm, addr, buf, len, gup_flags);
}
/*
@@ -3950,7 +3951,7 @@ int access_remote_vm(struct mm_struct *mm, unsigned long addr,
* Do not walk the page table directly, use get_user_pages
*/
int access_process_vm(struct task_struct *tsk, unsigned long addr,
- void *buf, int len, int write)
+ void *buf, int len, unsigned int gup_flags)
{
struct mm_struct *mm;
int ret;
@@ -3959,7 +3960,8 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr,
if (!mm)
return 0;
- ret = __access_remote_vm(tsk, mm, addr, buf, len, write);
+ ret = __access_remote_vm(tsk, mm, addr, buf, len, gup_flags);
+
mmput(mm);
return ret;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index ad1c96ac313c..0b859af06b87 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -850,7 +850,7 @@ static int lookup_node(unsigned long addr)
struct page *p;
int err;
- err = get_user_pages(addr & PAGE_MASK, 1, 0, 0, &p, NULL);
+ err = get_user_pages(addr & PAGE_MASK, 1, 0, &p, NULL);
if (err >= 0) {
err = page_to_nid(p);
put_page(p);
diff --git a/mm/nommu.c b/mm/nommu.c
index 95daf81a4855..db5fd1795298 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -160,33 +160,25 @@ finish_or_fault:
* - don't permit access to VMAs that don't support it, such as I/O mappings
*/
long get_user_pages(unsigned long start, unsigned long nr_pages,
- int write, int force, struct page **pages,
+ unsigned int gup_flags, struct page **pages,
struct vm_area_struct **vmas)
{
- int flags = 0;
-
- if (write)
- flags |= FOLL_WRITE;
- if (force)
- flags |= FOLL_FORCE;
-
- return __get_user_pages(current, current->mm, start, nr_pages, flags,
- pages, vmas, NULL);
+ return __get_user_pages(current, current->mm, start, nr_pages,
+ gup_flags, pages, vmas, NULL);
}
EXPORT_SYMBOL(get_user_pages);
long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
- int write, int force, struct page **pages,
+ unsigned int gup_flags, struct page **pages,
int *locked)
{
- return get_user_pages(start, nr_pages, write, force, pages, NULL);
+ return get_user_pages(start, nr_pages, gup_flags, pages, NULL);
}
EXPORT_SYMBOL(get_user_pages_locked);
long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm,
unsigned long start, unsigned long nr_pages,
- int write, int force, struct page **pages,
- unsigned int gup_flags)
+ struct page **pages, unsigned int gup_flags)
{
long ret;
down_read(&mm->mmap_sem);
@@ -198,10 +190,10 @@ long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm,
EXPORT_SYMBOL(__get_user_pages_unlocked);
long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
- int write, int force, struct page **pages)
+ struct page **pages, unsigned int gup_flags)
{
return __get_user_pages_unlocked(current, current->mm, start, nr_pages,
- write, force, pages, 0);
+ pages, gup_flags);
}
EXPORT_SYMBOL(get_user_pages_unlocked);
@@ -1817,9 +1809,10 @@ void filemap_map_pages(struct fault_env *fe,
EXPORT_SYMBOL(filemap_map_pages);
static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
- unsigned long addr, void *buf, int len, int write)
+ unsigned long addr, void *buf, int len, unsigned int gup_flags)
{
struct vm_area_struct *vma;
+ int write = gup_flags & FOLL_WRITE;
down_read(&mm->mmap_sem);
@@ -1854,21 +1847,22 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
* @addr: start address to access
* @buf: source or destination buffer
* @len: number of bytes to transfer
- * @write: whether the access is a write
+ * @gup_flags: flags modifying lookup behaviour
*
* The caller must hold a reference on @mm.
*/
int access_remote_vm(struct mm_struct *mm, unsigned long addr,
- void *buf, int len, int write)
+ void *buf, int len, unsigned int gup_flags)
{
- return __access_remote_vm(NULL, mm, addr, buf, len, write);
+ return __access_remote_vm(NULL, mm, addr, buf, len, gup_flags);
}
/*
* Access another process' address space.
* - source/target buffer must be kernel space
*/
-int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
+int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len,
+ unsigned int gup_flags)
{
struct mm_struct *mm;
@@ -1879,7 +1873,7 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in
if (!mm)
return 0;
- len = __access_remote_vm(tsk, mm, addr, buf, len, write);
+ len = __access_remote_vm(tsk, mm, addr, buf, len, gup_flags);
mmput(mm);
return len;
diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c
index 07514d41ebcc..be8dc8d1edb9 100644
--- a/mm/process_vm_access.c
+++ b/mm/process_vm_access.c
@@ -88,12 +88,16 @@ static int process_vm_rw_single_vec(unsigned long addr,
ssize_t rc = 0;
unsigned long max_pages_per_loop = PVM_MAX_KMALLOC_PAGES
/ sizeof(struct pages *);
+ unsigned int flags = FOLL_REMOTE;
/* Work out address and page range required */
if (len == 0)
return 0;
nr_pages = (addr + len - 1) / PAGE_SIZE - addr / PAGE_SIZE + 1;
+ if (vm_write)
+ flags |= FOLL_WRITE;
+
while (!rc && nr_pages && iov_iter_count(iter)) {
int pages = min(nr_pages, max_pages_per_loop);
size_t bytes;
@@ -104,8 +108,7 @@ static int process_vm_rw_single_vec(unsigned long addr,
* current/current->mm
*/
pages = __get_user_pages_unlocked(task, mm, pa, pages,
- vm_write, 0, process_pages,
- FOLL_REMOTE);
+ process_pages, flags);
if (pages <= 0)
return -EFAULT;
diff --git a/mm/util.c b/mm/util.c
index 662cddf914af..952cbe7ad7b7 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -283,7 +283,8 @@ EXPORT_SYMBOL_GPL(__get_user_pages_fast);
int __weak get_user_pages_fast(unsigned long start,
int nr_pages, int write, struct page **pages)
{
- return get_user_pages_unlocked(start, nr_pages, write, 0, pages);
+ return get_user_pages_unlocked(start, nr_pages, pages,
+ write ? FOLL_WRITE : 0);
}
EXPORT_SYMBOL_GPL(get_user_pages_fast);
@@ -623,7 +624,7 @@ int get_cmdline(struct task_struct *task, char *buffer, int buflen)
if (len > buflen)
len = buflen;
- res = access_process_vm(task, arg_start, buffer, len, 0);
+ res = access_process_vm(task, arg_start, buffer, len, FOLL_FORCE);
/*
* If the nul at the end of args has been overwritten, then
@@ -638,7 +639,8 @@ int get_cmdline(struct task_struct *task, char *buffer, int buflen)
if (len > buflen - res)
len = buflen - res;
res += access_process_vm(task, env_start,
- buffer+res, len, 0);
+ buffer+res, len,
+ FOLL_FORCE);
res = strnlen(buffer, res);
}
}