summaryrefslogtreecommitdiff
path: root/fs/btrfs/extent_io.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r--fs/btrfs/extent_io.c180
1 files changed, 89 insertions, 91 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index c09a40db53db..ff43802a7c88 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -13,13 +13,13 @@
#include <linux/cleancache.h>
#include "extent_io.h"
#include "extent_map.h"
-#include "compat.h"
#include "ctree.h"
#include "btrfs_inode.h"
#include "volumes.h"
#include "check-integrity.h"
#include "locking.h"
#include "rcu-string.h"
+#include "backref.h"
static struct kmem_cache *extent_state_cache;
static struct kmem_cache *extent_buffer_cache;
@@ -145,8 +145,16 @@ int __init extent_io_init(void)
offsetof(struct btrfs_io_bio, bio));
if (!btrfs_bioset)
goto free_buffer_cache;
+
+ if (bioset_integrity_create(btrfs_bioset, BIO_POOL_SIZE))
+ goto free_bioset;
+
return 0;
+free_bioset:
+ bioset_free(btrfs_bioset);
+ btrfs_bioset = NULL;
+
free_buffer_cache:
kmem_cache_destroy(extent_buffer_cache);
extent_buffer_cache = NULL;
@@ -1482,10 +1490,8 @@ static noinline u64 find_delalloc_range(struct extent_io_tree *tree,
cur_start = state->end + 1;
node = rb_next(node);
total_bytes += state->end - state->start + 1;
- if (total_bytes >= max_bytes) {
- *end = *start + max_bytes - 1;
+ if (total_bytes >= max_bytes)
break;
- }
if (!node)
break;
}
@@ -1591,11 +1597,10 @@ done:
*
* 1 is returned if we find something, 0 if nothing was in the tree
*/
-static noinline u64 find_lock_delalloc_range(struct inode *inode,
- struct extent_io_tree *tree,
- struct page *locked_page,
- u64 *start, u64 *end,
- u64 max_bytes)
+STATIC u64 find_lock_delalloc_range(struct inode *inode,
+ struct extent_io_tree *tree,
+ struct page *locked_page, u64 *start,
+ u64 *end, u64 max_bytes)
{
u64 delalloc_start;
u64 delalloc_end;
@@ -1614,7 +1619,7 @@ again:
*start = delalloc_start;
*end = delalloc_end;
free_extent_state(cached_state);
- return found;
+ return 0;
}
/*
@@ -1627,10 +1632,9 @@ again:
/*
* make sure to limit the number of pages we try to lock down
- * if we're looping.
*/
- if (delalloc_end + 1 - delalloc_start > max_bytes && loops)
- delalloc_end = delalloc_start + PAGE_CACHE_SIZE - 1;
+ if (delalloc_end + 1 - delalloc_start > max_bytes)
+ delalloc_end = delalloc_start + max_bytes - 1;
/* step two, lock all the pages after the page that has start */
ret = lock_delalloc_pages(inode, locked_page,
@@ -1641,8 +1645,7 @@ again:
*/
free_extent_state(cached_state);
if (!loops) {
- unsigned long offset = (*start) & (PAGE_CACHE_SIZE - 1);
- max_bytes = PAGE_CACHE_SIZE - offset;
+ max_bytes = PAGE_CACHE_SIZE;
loops = 1;
goto again;
} else {
@@ -1736,10 +1739,8 @@ u64 count_range_bits(struct extent_io_tree *tree,
u64 last = 0;
int found = 0;
- if (search_end <= cur_start) {
- WARN_ON(1);
+ if (WARN_ON(search_end <= cur_start))
return 0;
- }
spin_lock(&tree->lock);
if (cur_start == 0 && bits == EXTENT_DIRTY) {
@@ -1951,11 +1952,6 @@ static int free_io_failure(struct inode *inode, struct io_failure_record *rec,
return err;
}
-static void repair_io_failure_callback(struct bio *bio, int err)
-{
- complete(bio->bi_private);
-}
-
/*
* this bypasses the standard btrfs submit functions deliberately, as
* the standard behavior is to write all copies in a raid setup. here we only
@@ -1972,13 +1968,13 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
{
struct bio *bio;
struct btrfs_device *dev;
- DECLARE_COMPLETION_ONSTACK(compl);
u64 map_length = 0;
u64 sector;
struct btrfs_bio *bbio = NULL;
struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
int ret;
+ ASSERT(!(fs_info->sb->s_flags & MS_RDONLY));
BUG_ON(!mirror_num);
/* we can't repair anything in raid56 yet */
@@ -1988,8 +1984,6 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
if (!bio)
return -EIO;
- bio->bi_private = &compl;
- bio->bi_end_io = repair_io_failure_callback;
bio->bi_size = 0;
map_length = length;
@@ -2010,10 +2004,8 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
}
bio->bi_bdev = dev->bdev;
bio_add_page(bio, page, length, start - page_offset(page));
- btrfsic_submit_bio(WRITE_SYNC, bio);
- wait_for_completion(&compl);
- if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
+ if (btrfsic_submit_bio_wait(WRITE_SYNC, bio)) {
/* try to remap that extent elsewhere? */
bio_put(bio);
btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
@@ -2035,6 +2027,9 @@ int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb,
unsigned long i, num_pages = num_extent_pages(eb->start, eb->len);
int ret = 0;
+ if (root->fs_info->sb->s_flags & MS_RDONLY)
+ return -EROFS;
+
for (i = 0; i < num_pages; i++) {
struct page *p = extent_buffer_page(eb, i);
ret = repair_io_failure(root->fs_info, start, PAGE_CACHE_SIZE,
@@ -2056,12 +2051,12 @@ static int clean_io_failure(u64 start, struct page *page)
u64 private;
u64 private_failure;
struct io_failure_record *failrec;
- struct btrfs_fs_info *fs_info;
+ struct inode *inode = page->mapping->host;
+ struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
struct extent_state *state;
int num_copies;
int did_repair = 0;
int ret;
- struct inode *inode = page->mapping->host;
private = 0;
ret = count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private,
@@ -2084,6 +2079,8 @@ static int clean_io_failure(u64 start, struct page *page)
did_repair = 1;
goto out;
}
+ if (fs_info->sb->s_flags & MS_RDONLY)
+ goto out;
spin_lock(&BTRFS_I(inode)->io_tree.lock);
state = find_first_extent_bit_state(&BTRFS_I(inode)->io_tree,
@@ -2093,7 +2090,6 @@ static int clean_io_failure(u64 start, struct page *page)
if (state && state->start <= failrec->start &&
state->end >= failrec->start + failrec->len - 1) {
- fs_info = BTRFS_I(inode)->root->fs_info;
num_copies = btrfs_num_copies(fs_info, failrec->logical,
failrec->len);
if (num_copies > 1) {
@@ -3565,9 +3561,8 @@ retry:
* but no sense in crashing the users box for something
* we can survive anyway.
*/
- if (!eb) {
+ if (WARN_ON(!eb)) {
spin_unlock(&mapping->private_lock);
- WARN_ON(1);
continue;
}
@@ -4034,7 +4029,7 @@ static struct extent_map *get_extent_skip_holes(struct inode *inode,
if (offset >= last)
return NULL;
- while(1) {
+ while (1) {
len = last - offset;
if (len == 0)
break;
@@ -4058,6 +4053,19 @@ static struct extent_map *get_extent_skip_holes(struct inode *inode,
return NULL;
}
+static noinline int count_ext_ref(u64 inum, u64 offset, u64 root_id, void *ctx)
+{
+ unsigned long cnt = *((unsigned long *)ctx);
+
+ cnt++;
+ *((unsigned long *)ctx) = cnt;
+
+ /* Now we're sure that the extent is shared. */
+ if (cnt > 1)
+ return 1;
+ return 0;
+}
+
int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len, get_extent_t *get_extent)
{
@@ -4124,7 +4132,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
last = found_key.offset;
last_for_get_extent = last + 1;
}
- btrfs_free_path(path);
+ btrfs_release_path(path);
/*
* we might have some extents allocated but more delalloc past those
@@ -4194,7 +4202,24 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
flags |= (FIEMAP_EXTENT_DELALLOC |
FIEMAP_EXTENT_UNKNOWN);
} else {
+ unsigned long ref_cnt = 0;
+
disko = em->block_start + offset_in_extent;
+
+ /*
+ * As btrfs supports shared space, this information
+ * can be exported to userspace tools via
+ * flag FIEMAP_EXTENT_SHARED.
+ */
+ ret = iterate_inodes_from_logical(
+ em->block_start,
+ BTRFS_I(inode)->root->fs_info,
+ path, count_ext_ref, &ref_cnt);
+ if (ret < 0 && ret != -ENOENT)
+ goto out_free;
+
+ if (ref_cnt > 1)
+ flags |= FIEMAP_EXTENT_SHARED;
}
if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
flags |= FIEMAP_EXTENT_ENCODED;
@@ -4226,6 +4251,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
out_free:
free_extent_map(em);
out:
+ btrfs_free_path(path);
unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len - 1,
&cached_state, GFP_NOFS);
return ret;
@@ -4451,6 +4477,23 @@ static void mark_extent_buffer_accessed(struct extent_buffer *eb)
}
}
+struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
+ u64 start)
+{
+ struct extent_buffer *eb;
+
+ rcu_read_lock();
+ eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
+ if (eb && atomic_inc_not_zero(&eb->refs)) {
+ rcu_read_unlock();
+ mark_extent_buffer_accessed(eb);
+ return eb;
+ }
+ rcu_read_unlock();
+
+ return NULL;
+}
+
struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
u64 start, unsigned long len)
{
@@ -4464,14 +4507,10 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
int uptodate = 1;
int ret;
- rcu_read_lock();
- eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
- if (eb && atomic_inc_not_zero(&eb->refs)) {
- rcu_read_unlock();
- mark_extent_buffer_accessed(eb);
+
+ eb = find_extent_buffer(tree, start);
+ if (eb)
return eb;
- }
- rcu_read_unlock();
eb = __alloc_extent_buffer(tree, start, len, GFP_NOFS);
if (!eb)
@@ -4530,24 +4569,17 @@ again:
spin_lock(&tree->buffer_lock);
ret = radix_tree_insert(&tree->buffer, start >> PAGE_CACHE_SHIFT, eb);
+ spin_unlock(&tree->buffer_lock);
+ radix_tree_preload_end();
if (ret == -EEXIST) {
- exists = radix_tree_lookup(&tree->buffer,
- start >> PAGE_CACHE_SHIFT);
- if (!atomic_inc_not_zero(&exists->refs)) {
- spin_unlock(&tree->buffer_lock);
- radix_tree_preload_end();
- exists = NULL;
+ exists = find_extent_buffer(tree, start);
+ if (exists)
+ goto free_eb;
+ else
goto again;
- }
- spin_unlock(&tree->buffer_lock);
- radix_tree_preload_end();
- mark_extent_buffer_accessed(exists);
- goto free_eb;
}
/* add one reference for the tree */
check_buffer_tree_ref(eb);
- spin_unlock(&tree->buffer_lock);
- radix_tree_preload_end();
/*
* there is a race where release page may have
@@ -4578,23 +4610,6 @@ free_eb:
return exists;
}
-struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
- u64 start, unsigned long len)
-{
- struct extent_buffer *eb;
-
- rcu_read_lock();
- eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
- if (eb && atomic_inc_not_zero(&eb->refs)) {
- rcu_read_unlock();
- mark_extent_buffer_accessed(eb);
- return eb;
- }
- rcu_read_unlock();
-
- return NULL;
-}
-
static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
{
struct extent_buffer *eb =
@@ -5058,23 +5073,6 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
}
}
-static void move_pages(struct page *dst_page, struct page *src_page,
- unsigned long dst_off, unsigned long src_off,
- unsigned long len)
-{
- char *dst_kaddr = page_address(dst_page);
- if (dst_page == src_page) {
- memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len);
- } else {
- char *src_kaddr = page_address(src_page);
- char *p = dst_kaddr + dst_off + len;
- char *s = src_kaddr + src_off + len;
-
- while (len--)
- *--p = *--s;
- }
-}
-
static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len)
{
unsigned long distance = (src > dst) ? src - dst : dst - src;
@@ -5185,7 +5183,7 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
cur = min_t(unsigned long, len, src_off_in_page + 1);
cur = min(cur, dst_off_in_page + 1);
- move_pages(extent_buffer_page(dst, dst_i),
+ copy_pages(extent_buffer_page(dst, dst_i),
extent_buffer_page(dst, src_i),
dst_off_in_page - cur + 1,
src_off_in_page - cur + 1, cur);