summaryrefslogtreecommitdiff
path: root/mm/filemap.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/filemap.c')
-rw-r--r--mm/filemap.c175
1 files changed, 99 insertions, 76 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index 379ff0bcbf6e..22396713feb9 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -121,7 +121,6 @@ void __remove_from_page_cache(struct page *page)
mapping->nrpages--;
__dec_zone_page_state(page, NR_FILE_PAGES);
BUG_ON(page_mapped(page));
- mem_cgroup_uncharge_cache_page(page);
/*
* Some filesystems seem to re-dirty the page even after
@@ -145,6 +144,7 @@ void remove_from_page_cache(struct page *page)
spin_lock_irq(&mapping->tree_lock);
__remove_from_page_cache(page);
spin_unlock_irq(&mapping->tree_lock);
+ mem_cgroup_uncharge_cache_page(page);
}
static int sync_page(void *word)
@@ -476,13 +476,13 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
if (likely(!error)) {
mapping->nrpages++;
__inc_zone_page_state(page, NR_FILE_PAGES);
+ spin_unlock_irq(&mapping->tree_lock);
} else {
page->mapping = NULL;
+ spin_unlock_irq(&mapping->tree_lock);
mem_cgroup_uncharge_cache_page(page);
page_cache_release(page);
}
-
- spin_unlock_irq(&mapping->tree_lock);
radix_tree_preload_end();
} else
mem_cgroup_uncharge_cache_page(page);
@@ -521,7 +521,7 @@ struct page *__page_cache_alloc(gfp_t gfp)
{
if (cpuset_do_page_mem_spread()) {
int n = cpuset_mem_spread_node();
- return alloc_pages_node(n, gfp, 0);
+ return alloc_pages_exact_node(n, gfp, 0);
}
return alloc_pages(gfp, 0);
}
@@ -1004,9 +1004,6 @@ EXPORT_SYMBOL(grab_cache_page_nowait);
static void shrink_readahead_size_eio(struct file *filp,
struct file_ra_state *ra)
{
- if (!ra->ra_pages)
- return;
-
ra->ra_pages /= 4;
}
@@ -1390,8 +1387,7 @@ do_readahead(struct address_space *mapping, struct file *filp,
if (!mapping || !mapping->a_ops || !mapping->a_ops->readpage)
return -EINVAL;
- force_page_cache_readahead(mapping, filp, index,
- max_sane_readahead(nr));
+ force_page_cache_readahead(mapping, filp, index, nr);
return 0;
}
@@ -1457,6 +1453,73 @@ static int page_cache_read(struct file *file, pgoff_t offset)
#define MMAP_LOTSAMISS (100)
+/*
+ * Synchronous readahead happens when we don't even find
+ * a page in the page cache at all.
+ */
+static void do_sync_mmap_readahead(struct vm_area_struct *vma,
+ struct file_ra_state *ra,
+ struct file *file,
+ pgoff_t offset)
+{
+ unsigned long ra_pages;
+ struct address_space *mapping = file->f_mapping;
+
+ /* If we don't want any read-ahead, don't bother */
+ if (VM_RandomReadHint(vma))
+ return;
+
+ if (VM_SequentialReadHint(vma) ||
+ offset - 1 == (ra->prev_pos >> PAGE_CACHE_SHIFT)) {
+ page_cache_sync_readahead(mapping, ra, file, offset,
+ ra->ra_pages);
+ return;
+ }
+
+ if (ra->mmap_miss < INT_MAX)
+ ra->mmap_miss++;
+
+ /*
+ * Do we miss much more than hit in this file? If so,
+ * stop bothering with read-ahead. It will only hurt.
+ */
+ if (ra->mmap_miss > MMAP_LOTSAMISS)
+ return;
+
+ /*
+ * mmap read-around
+ */
+ ra_pages = max_sane_readahead(ra->ra_pages);
+ if (ra_pages) {
+ ra->start = max_t(long, 0, offset - ra_pages/2);
+ ra->size = ra_pages;
+ ra->async_size = 0;
+ ra_submit(ra, mapping, file);
+ }
+}
+
+/*
+ * Asynchronous readahead happens when we find the page and PG_readahead,
+ * so we want to possibly extend the readahead further..
+ */
+static void do_async_mmap_readahead(struct vm_area_struct *vma,
+ struct file_ra_state *ra,
+ struct file *file,
+ struct page *page,
+ pgoff_t offset)
+{
+ struct address_space *mapping = file->f_mapping;
+
+ /* If we don't want any read-ahead, don't bother */
+ if (VM_RandomReadHint(vma))
+ return;
+ if (ra->mmap_miss > 0)
+ ra->mmap_miss--;
+ if (PageReadahead(page))
+ page_cache_async_readahead(mapping, ra, file,
+ page, offset, ra->ra_pages);
+}
+
/**
* filemap_fault - read in file data for page fault handling
* @vma: vma in which the fault was taken
@@ -1476,78 +1539,44 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
struct address_space *mapping = file->f_mapping;
struct file_ra_state *ra = &file->f_ra;
struct inode *inode = mapping->host;
+ pgoff_t offset = vmf->pgoff;
struct page *page;
pgoff_t size;
- int did_readaround = 0;
int ret = 0;
size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
- if (vmf->pgoff >= size)
+ if (offset >= size)
return VM_FAULT_SIGBUS;
- /* If we don't want any read-ahead, don't bother */
- if (VM_RandomReadHint(vma))
- goto no_cached_page;
-
/*
* Do we have something in the page cache already?
*/
-retry_find:
- page = find_lock_page(mapping, vmf->pgoff);
- /*
- * For sequential accesses, we use the generic readahead logic.
- */
- if (VM_SequentialReadHint(vma)) {
- if (!page) {
- page_cache_sync_readahead(mapping, ra, file,
- vmf->pgoff, 1);
- page = find_lock_page(mapping, vmf->pgoff);
- if (!page)
- goto no_cached_page;
- }
- if (PageReadahead(page)) {
- page_cache_async_readahead(mapping, ra, file, page,
- vmf->pgoff, 1);
- }
- }
-
- if (!page) {
- unsigned long ra_pages;
-
- ra->mmap_miss++;
-
+ page = find_get_page(mapping, offset);
+ if (likely(page)) {
/*
- * Do we miss much more than hit in this file? If so,
- * stop bothering with read-ahead. It will only hurt.
+ * We found the page, so try async readahead before
+ * waiting for the lock.
*/
- if (ra->mmap_miss > MMAP_LOTSAMISS)
- goto no_cached_page;
+ do_async_mmap_readahead(vma, ra, file, page, offset);
+ lock_page(page);
- /*
- * To keep the pgmajfault counter straight, we need to
- * check did_readaround, as this is an inner loop.
- */
- if (!did_readaround) {
- ret = VM_FAULT_MAJOR;
- count_vm_event(PGMAJFAULT);
- }
- did_readaround = 1;
- ra_pages = max_sane_readahead(file->f_ra.ra_pages);
- if (ra_pages) {
- pgoff_t start = 0;
-
- if (vmf->pgoff > ra_pages / 2)
- start = vmf->pgoff - ra_pages / 2;
- do_page_cache_readahead(mapping, file, start, ra_pages);
+ /* Did it get truncated? */
+ if (unlikely(page->mapping != mapping)) {
+ unlock_page(page);
+ put_page(page);
+ goto no_cached_page;
}
- page = find_lock_page(mapping, vmf->pgoff);
+ } else {
+ /* No page in the page cache at all */
+ do_sync_mmap_readahead(vma, ra, file, offset);
+ count_vm_event(PGMAJFAULT);
+ ret = VM_FAULT_MAJOR;
+retry_find:
+ page = find_lock_page(mapping, offset);
if (!page)
goto no_cached_page;
}
- if (!did_readaround)
- ra->mmap_miss--;
-
/*
* We have a locked page in the page cache, now we need to check
* that it's up-to-date. If not, it is going to be due to an error.
@@ -1555,18 +1584,18 @@ retry_find:
if (unlikely(!PageUptodate(page)))
goto page_not_uptodate;
- /* Must recheck i_size under page lock */
+ /*
+ * Found the page and have a reference on it.
+ * We must recheck i_size under page lock.
+ */
size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
- if (unlikely(vmf->pgoff >= size)) {
+ if (unlikely(offset >= size)) {
unlock_page(page);
page_cache_release(page);
return VM_FAULT_SIGBUS;
}
- /*
- * Found the page and have a reference on it.
- */
- ra->prev_pos = (loff_t)page->index << PAGE_CACHE_SHIFT;
+ ra->prev_pos = (loff_t)offset << PAGE_CACHE_SHIFT;
vmf->page = page;
return ret | VM_FAULT_LOCKED;
@@ -1575,7 +1604,7 @@ no_cached_page:
* We're only likely to ever get here if MADV_RANDOM is in
* effect.
*/
- error = page_cache_read(file, vmf->pgoff);
+ error = page_cache_read(file, offset);
/*
* The page we want has now been added to the page cache.
@@ -1595,12 +1624,6 @@ no_cached_page:
return VM_FAULT_SIGBUS;
page_not_uptodate:
- /* IO error path */
- if (!did_readaround) {
- ret = VM_FAULT_MAJOR;
- count_vm_event(PGMAJFAULT);
- }
-
/*
* Umm, take care of errors if the page isn't up-to-date.
* Try to re-read it _once_. We do this synchronously,