summaryrefslogtreecommitdiff
path: root/mm/filemap.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/filemap.c')
-rw-r--r--mm/filemap.c445
1 files changed, 105 insertions, 340 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index d1d9814f99dd..15c8413ee929 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -120,6 +120,7 @@ void __remove_from_page_cache(struct page *page)
page->mapping = NULL;
mapping->nrpages--;
__dec_zone_page_state(page, NR_FILE_PAGES);
+ BUG_ON(page_mapped(page));
}
void remove_from_page_cache(struct page *page)
@@ -866,13 +867,11 @@ void do_generic_mapping_read(struct address_space *mapping,
{
struct inode *inode = mapping->host;
unsigned long index;
- unsigned long end_index;
unsigned long offset;
unsigned long last_index;
unsigned long next_index;
unsigned long prev_index;
unsigned int prev_offset;
- loff_t isize;
struct page *cached_page;
int error;
struct file_ra_state ra = *_ra;
@@ -885,42 +884,58 @@ void do_generic_mapping_read(struct address_space *mapping,
last_index = (*ppos + desc->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT;
offset = *ppos & ~PAGE_CACHE_MASK;
- isize = i_size_read(inode);
- if (!isize)
- goto out;
-
- end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
for (;;) {
struct page *page;
+ unsigned long end_index;
+ loff_t isize;
unsigned long nr, ret;
+ cond_resched();
+find_page:
+ page = find_get_page(mapping, index);
+ if (!page) {
+ page_cache_sync_readahead(mapping,
+ &ra, filp,
+ index, last_index - index);
+ page = find_get_page(mapping, index);
+ if (unlikely(page == NULL))
+ goto no_cached_page;
+ }
+ if (PageReadahead(page)) {
+ page_cache_async_readahead(mapping,
+ &ra, filp, page,
+ index, last_index - index);
+ }
+ if (!PageUptodate(page))
+ goto page_not_up_to_date;
+page_ok:
+ /*
+ * i_size must be checked after we know the page is Uptodate.
+ *
+ * Checking i_size after the check allows us to calculate
+ * the correct value for "nr", which means the zero-filled
+ * part of the page is not copied back to userspace (unless
+ * another truncate extends the file - this is desired though).
+ */
+
+ isize = i_size_read(inode);
+ end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
+ if (unlikely(!isize || index > end_index)) {
+ page_cache_release(page);
+ goto out;
+ }
+
/* nr is the maximum number of bytes to copy from this page */
nr = PAGE_CACHE_SIZE;
- if (index >= end_index) {
- if (index > end_index)
- goto out;
+ if (index == end_index) {
nr = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
if (nr <= offset) {
+ page_cache_release(page);
goto out;
}
}
nr = nr - offset;
- cond_resched();
- if (index == next_index)
- next_index = page_cache_readahead(mapping, &ra, filp,
- index, last_index - index);
-
-find_page:
- page = find_get_page(mapping, index);
- if (unlikely(page == NULL)) {
- handle_ra_miss(mapping, &ra, index);
- goto no_cached_page;
- }
- if (!PageUptodate(page))
- goto page_not_up_to_date;
-page_ok:
-
/* If users can be writing to this page using arbitrary
* virtual addresses, take care about potential aliasing
* before reading the page on the kernel side.
@@ -1006,31 +1021,6 @@ readpage:
unlock_page(page);
}
- /*
- * i_size must be checked after we have done ->readpage.
- *
- * Checking i_size after the readpage allows us to calculate
- * the correct value for "nr", which means the zero-filled
- * part of the page is not copied back to userspace (unless
- * another truncate extends the file - this is desired though).
- */
- isize = i_size_read(inode);
- end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
- if (unlikely(!isize || index > end_index)) {
- page_cache_release(page);
- goto out;
- }
-
- /* nr is the maximum number of bytes to copy from this page */
- nr = PAGE_CACHE_SIZE;
- if (index == end_index) {
- nr = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
- if (nr <= offset) {
- page_cache_release(page);
- goto out;
- }
- }
- nr = nr - offset;
goto page_ok;
readpage_error:
@@ -1066,6 +1056,7 @@ no_cached_page:
out:
*_ra = ra;
+ _ra->prev_index = prev_index;
*ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
if (cached_page)
@@ -1218,6 +1209,8 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
retval = retval ?: desc.error;
break;
}
+ if (desc.count > 0)
+ break;
}
}
out:
@@ -1225,46 +1218,6 @@ out:
}
EXPORT_SYMBOL(generic_file_aio_read);
-int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size)
-{
- ssize_t written;
- unsigned long count = desc->count;
- struct file *file = desc->arg.data;
-
- if (size > count)
- size = count;
-
- written = file->f_op->sendpage(file, page, offset,
- size, &file->f_pos, size<count);
- if (written < 0) {
- desc->error = written;
- written = 0;
- }
- desc->count = count - written;
- desc->written += written;
- return written;
-}
-
-ssize_t generic_file_sendfile(struct file *in_file, loff_t *ppos,
- size_t count, read_actor_t actor, void *target)
-{
- read_descriptor_t desc;
-
- if (!count)
- return 0;
-
- desc.written = 0;
- desc.count = count;
- desc.arg.data = target;
- desc.error = 0;
-
- do_generic_file_read(in_file, ppos, &desc, actor);
- if (desc.written)
- return desc.written;
- return desc.error;
-}
-EXPORT_SYMBOL(generic_file_sendfile);
-
static ssize_t
do_readahead(struct address_space *mapping, struct file *filp,
unsigned long index, unsigned long nr)
@@ -1334,62 +1287,62 @@ static int fastcall page_cache_read(struct file * file, unsigned long offset)
#define MMAP_LOTSAMISS (100)
/**
- * filemap_nopage - read in file data for page fault handling
- * @area: the applicable vm_area
- * @address: target address to read in
- * @type: returned with VM_FAULT_{MINOR,MAJOR} if not %NULL
+ * filemap_fault - read in file data for page fault handling
+ * @vma: vma in which the fault was taken
+ * @vmf: struct vm_fault containing details of the fault
*
- * filemap_nopage() is invoked via the vma operations vector for a
+ * filemap_fault() is invoked via the vma operations vector for a
* mapped memory region to read in file data during a page fault.
*
* The goto's are kind of ugly, but this streamlines the normal case of having
* it in the page cache, and handles the special cases reasonably without
* having a lot of duplicated code.
*/
-struct page *filemap_nopage(struct vm_area_struct *area,
- unsigned long address, int *type)
+int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
int error;
- struct file *file = area->vm_file;
+ struct file *file = vma->vm_file;
struct address_space *mapping = file->f_mapping;
struct file_ra_state *ra = &file->f_ra;
struct inode *inode = mapping->host;
struct page *page;
- unsigned long size, pgoff;
- int did_readaround = 0, majmin = VM_FAULT_MINOR;
-
- pgoff = ((address-area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff;
+ unsigned long size;
+ int did_readaround = 0;
+ int ret = 0;
-retry_all:
size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
- if (pgoff >= size)
+ if (vmf->pgoff >= size)
goto outside_data_content;
/* If we don't want any read-ahead, don't bother */
- if (VM_RandomReadHint(area))
+ if (VM_RandomReadHint(vma))
goto no_cached_page;
/*
- * The readahead code wants to be told about each and every page
- * so it can build and shrink its windows appropriately
- *
- * For sequential accesses, we use the generic readahead logic.
- */
- if (VM_SequentialReadHint(area))
- page_cache_readahead(mapping, ra, file, pgoff, 1);
-
- /*
* Do we have something in the page cache already?
*/
retry_find:
- page = find_get_page(mapping, pgoff);
+ page = find_lock_page(mapping, vmf->pgoff);
+ /*
+ * For sequential accesses, we use the generic readahead logic.
+ */
+ if (VM_SequentialReadHint(vma)) {
+ if (!page) {
+ page_cache_sync_readahead(mapping, ra, file,
+ vmf->pgoff, 1);
+ page = find_lock_page(mapping, vmf->pgoff);
+ if (!page)
+ goto no_cached_page;
+ }
+ if (PageReadahead(page)) {
+ page_cache_async_readahead(mapping, ra, file, page,
+ vmf->pgoff, 1);
+ }
+ }
+
if (!page) {
unsigned long ra_pages;
- if (VM_SequentialReadHint(area)) {
- handle_ra_miss(mapping, ra, pgoff);
- goto no_cached_page;
- }
ra->mmap_miss++;
/*
@@ -1404,7 +1357,7 @@ retry_find:
* check did_readaround, as this is an inner loop.
*/
if (!did_readaround) {
- majmin = VM_FAULT_MAJOR;
+ ret = VM_FAULT_MAJOR;
count_vm_event(PGMAJFAULT);
}
did_readaround = 1;
@@ -1412,11 +1365,11 @@ retry_find:
if (ra_pages) {
pgoff_t start = 0;
- if (pgoff > ra_pages / 2)
- start = pgoff - ra_pages / 2;
+ if (vmf->pgoff > ra_pages / 2)
+ start = vmf->pgoff - ra_pages / 2;
do_page_cache_readahead(mapping, file, start, ra_pages);
}
- page = find_get_page(mapping, pgoff);
+ page = find_lock_page(mapping, vmf->pgoff);
if (!page)
goto no_cached_page;
}
@@ -1425,35 +1378,43 @@ retry_find:
ra->mmap_hit++;
/*
- * Ok, found a page in the page cache, now we need to check
- * that it's up-to-date.
+ * We have a locked page in the page cache, now we need to check
+ * that it's up-to-date. If not, it is going to be due to an error.
*/
- if (!PageUptodate(page))
+ if (unlikely(!PageUptodate(page)))
goto page_not_uptodate;
-success:
+ /* Must recheck i_size under page lock */
+ size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ if (unlikely(vmf->pgoff >= size)) {
+ unlock_page(page);
+ page_cache_release(page);
+ goto outside_data_content;
+ }
+
/*
* Found the page and have a reference on it.
*/
mark_page_accessed(page);
- if (type)
- *type = majmin;
- return page;
+ ra->prev_index = page->index;
+ vmf->page = page;
+ return ret | VM_FAULT_LOCKED;
outside_data_content:
/*
* An external ptracer can access pages that normally aren't
* accessible..
*/
- if (area->vm_mm == current->mm)
- return NOPAGE_SIGBUS;
+ if (vma->vm_mm == current->mm)
+ return VM_FAULT_SIGBUS;
+
/* Fall through to the non-read-ahead case */
no_cached_page:
/*
* We're only likely to ever get here if MADV_RANDOM is in
* effect.
*/
- error = page_cache_read(file, pgoff);
+ error = page_cache_read(file, vmf->pgoff);
/*
* The page we want has now been added to the page cache.
@@ -1469,12 +1430,13 @@ no_cached_page:
* to schedule I/O.
*/
if (error == -ENOMEM)
- return NOPAGE_OOM;
- return NOPAGE_SIGBUS;
+ return VM_FAULT_OOM;
+ return VM_FAULT_SIGBUS;
page_not_uptodate:
+ /* IO error path */
if (!did_readaround) {
- majmin = VM_FAULT_MAJOR;
+ ret = VM_FAULT_MAJOR;
count_vm_event(PGMAJFAULT);
}
@@ -1484,217 +1446,21 @@ page_not_uptodate:
* because there really aren't any performance issues here
* and we need to check for errors.
*/
- lock_page(page);
-
- /* Somebody truncated the page on us? */
- if (!page->mapping) {
- unlock_page(page);
- page_cache_release(page);
- goto retry_all;
- }
-
- /* Somebody else successfully read it in? */
- if (PageUptodate(page)) {
- unlock_page(page);
- goto success;
- }
ClearPageError(page);
error = mapping->a_ops->readpage(file, page);
- if (!error) {
- wait_on_page_locked(page);
- if (PageUptodate(page))
- goto success;
- } else if (error == AOP_TRUNCATED_PAGE) {
- page_cache_release(page);
- goto retry_find;
- }
-
- /*
- * Things didn't work out. Return zero to tell the
- * mm layer so, possibly freeing the page cache page first.
- */
- shrink_readahead_size_eio(file, ra);
page_cache_release(page);
- return NOPAGE_SIGBUS;
-}
-EXPORT_SYMBOL(filemap_nopage);
-
-static struct page * filemap_getpage(struct file *file, unsigned long pgoff,
- int nonblock)
-{
- struct address_space *mapping = file->f_mapping;
- struct page *page;
- int error;
-
- /*
- * Do we have something in the page cache already?
- */
-retry_find:
- page = find_get_page(mapping, pgoff);
- if (!page) {
- if (nonblock)
- return NULL;
- goto no_cached_page;
- }
-
- /*
- * Ok, found a page in the page cache, now we need to check
- * that it's up-to-date.
- */
- if (!PageUptodate(page)) {
- if (nonblock) {
- page_cache_release(page);
- return NULL;
- }
- goto page_not_uptodate;
- }
-
-success:
- /*
- * Found the page and have a reference on it.
- */
- mark_page_accessed(page);
- return page;
-
-no_cached_page:
- error = page_cache_read(file, pgoff);
-
- /*
- * The page we want has now been added to the page cache.
- * In the unlikely event that someone removed it in the
- * meantime, we'll just come back here and read it again.
- */
- if (error >= 0)
- goto retry_find;
-
- /*
- * An error return from page_cache_read can result if the
- * system is low on memory, or a problem occurs while trying
- * to schedule I/O.
- */
- return NULL;
-
-page_not_uptodate:
- lock_page(page);
-
- /* Did it get truncated while we waited for it? */
- if (!page->mapping) {
- unlock_page(page);
- goto err;
- }
- /* Did somebody else get it up-to-date? */
- if (PageUptodate(page)) {
- unlock_page(page);
- goto success;
- }
-
- error = mapping->a_ops->readpage(file, page);
- if (!error) {
- wait_on_page_locked(page);
- if (PageUptodate(page))
- goto success;
- } else if (error == AOP_TRUNCATED_PAGE) {
- page_cache_release(page);
- goto retry_find;
- }
-
- /*
- * Umm, take care of errors if the page isn't up-to-date.
- * Try to re-read it _once_. We do this synchronously,
- * because there really aren't any performance issues here
- * and we need to check for errors.
- */
- lock_page(page);
-
- /* Somebody truncated the page on us? */
- if (!page->mapping) {
- unlock_page(page);
- goto err;
- }
- /* Somebody else successfully read it in? */
- if (PageUptodate(page)) {
- unlock_page(page);
- goto success;
- }
-
- ClearPageError(page);
- error = mapping->a_ops->readpage(file, page);
- if (!error) {
- wait_on_page_locked(page);
- if (PageUptodate(page))
- goto success;
- } else if (error == AOP_TRUNCATED_PAGE) {
- page_cache_release(page);
+ if (!error || error == AOP_TRUNCATED_PAGE)
goto retry_find;
- }
- /*
- * Things didn't work out. Return zero to tell the
- * mm layer so, possibly freeing the page cache page first.
- */
-err:
- page_cache_release(page);
-
- return NULL;
-}
-
-int filemap_populate(struct vm_area_struct *vma, unsigned long addr,
- unsigned long len, pgprot_t prot, unsigned long pgoff,
- int nonblock)
-{
- struct file *file = vma->vm_file;
- struct address_space *mapping = file->f_mapping;
- struct inode *inode = mapping->host;
- unsigned long size;
- struct mm_struct *mm = vma->vm_mm;
- struct page *page;
- int err;
-
- if (!nonblock)
- force_page_cache_readahead(mapping, vma->vm_file,
- pgoff, len >> PAGE_CACHE_SHIFT);
-
-repeat:
- size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
- if (pgoff + (len >> PAGE_CACHE_SHIFT) > size)
- return -EINVAL;
-
- page = filemap_getpage(file, pgoff, nonblock);
-
- /* XXX: This is wrong, a filesystem I/O error may have happened. Fix that as
- * done in shmem_populate calling shmem_getpage */
- if (!page && !nonblock)
- return -ENOMEM;
-
- if (page) {
- err = install_page(mm, vma, addr, page, prot);
- if (err) {
- page_cache_release(page);
- return err;
- }
- } else if (vma->vm_flags & VM_NONLINEAR) {
- /* No page was found just because we can't read it in now (being
- * here implies nonblock != 0), but the page may exist, so set
- * the PTE to fault it in later. */
- err = install_file_pte(mm, vma, addr, pgoff, prot);
- if (err)
- return err;
- }
-
- len -= PAGE_SIZE;
- addr += PAGE_SIZE;
- pgoff++;
- if (len)
- goto repeat;
-
- return 0;
+ /* Things didn't work out. Return zero to tell the mm layer so. */
+ shrink_readahead_size_eio(file, ra);
+ return VM_FAULT_SIGBUS;
}
-EXPORT_SYMBOL(filemap_populate);
+EXPORT_SYMBOL(filemap_fault);
struct vm_operations_struct generic_file_vm_ops = {
- .nopage = filemap_nopage,
- .populate = filemap_populate,
+ .fault = filemap_fault,
};
/* This is used for a general mmap of a disk file */
@@ -1707,6 +1473,7 @@ int generic_file_mmap(struct file * file, struct vm_area_struct * vma)
return -ENOEXEC;
file_accessed(file);
vma->vm_ops = &generic_file_vm_ops;
+ vma->vm_flags |= VM_CAN_NONLINEAR;
return 0;
}
@@ -1984,7 +1751,6 @@ inline int generic_write_checks(struct file *file, loff_t *pos, size_t *count, i
if (unlikely(*pos + *count > MAX_NON_LFS &&
!(file->f_flags & O_LARGEFILE))) {
if (*pos >= MAX_NON_LFS) {
- send_sig(SIGXFSZ, current, 0);
return -EFBIG;
}
if (*count > MAX_NON_LFS - (unsigned long)*pos) {
@@ -2002,7 +1768,6 @@ inline int generic_write_checks(struct file *file, loff_t *pos, size_t *count, i
if (likely(!isblk)) {
if (unlikely(*pos >= inode->i_sb->s_maxbytes)) {
if (*count || *pos > inode->i_sb->s_maxbytes) {
- send_sig(SIGXFSZ, current, 0);
return -EFBIG;
}
/* zero-length writes at ->s_maxbytes are OK */