From a6fa6fae40ec336c7df6155255ae64ebef43a8bc Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 25 Oct 2010 15:12:26 +0800 Subject: btrfs: Add lzo compression support Lzo is a much faster compression algorithm than gzib, so would allow more users to enable transparent compression, and some users can choose from compression ratio and speed for different applications Usage: # mount -t btrfs -o compress[=] dev /mnt or # mount -t btrfs -o compress-force[=] dev /mnt "-o compress" without argument is still allowed for compatability. Compatibility: If we mount a filesystem with lzo compression, it will not be able be mounted in old kernels. One reason is, otherwise btrfs will directly dump compressed data, which sits in inline extent, to user. Performance: The test copied a linux source tarball (~400M) from an ext4 partition to the btrfs partition, and then extracted it. (time in second) lzo zlib nocompress copy: 10.6 21.7 14.9 extract: 70.1 94.4 66.6 (data size in MB) lzo zlib nocompress copy: 185.87 108.69 394.49 extract: 193.80 132.36 381.21 Changelog: v1 -> v2: - Select LZO_COMPRESS and LZO_DECOMPRESS in btrfs Kconfig. - Add incompability flag. - Fix error handling in compress code. Signed-off-by: Li Zefan --- fs/btrfs/disk-io.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a5d2249e6da5..f88eb2ce7919 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1744,10 +1744,10 @@ struct btrfs_root *open_ctree(struct super_block *sb, } features = btrfs_super_incompat_flags(disk_super); - if (!(features & BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF)) { - features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; - btrfs_set_super_incompat_flags(disk_super, features); - } + features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; + if (tree_root->fs_info->compress_type & BTRFS_COMPRESS_LZO) + features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; + btrfs_set_super_incompat_flags(disk_super, features); features = btrfs_super_compat_ro_flags(disk_super) & ~BTRFS_FEATURE_COMPAT_RO_SUPP; -- cgit v1.2.3 From 20b450773d17e325190c158e10bfdb25dc21d2d6 Mon Sep 17 00:00:00 2001 From: Dave Young Date: Sat, 8 Jan 2011 10:09:13 +0000 Subject: btrfs: mount failure return value fix I happened to pass swap partition as root partition in cmdline, then kernel panic and tell me about "Cannot open root device". It is not correct, in fact it is a fs type mismatch instead of 'no device'. Eventually I found btrfs mounting failed with -EIO, it should be -EINVAL. The logic in init/do_mounts.c: for (p = fs_names; *p; p += strlen(p)+1) { int err = do_mount_root(name, p, flags, root_mount_data); switch (err) { case 0: goto out; case -EACCES: flags |= MS_RDONLY; goto retry; case -EINVAL: continue; } print "Cannot open root device" panic } SO fs type after btrfs will have no chance to mount Here fix the return value as -EINVAL Signed-off-by: Dave Young Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f88eb2ce7919..f9efb68fc2e3 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1713,8 +1713,10 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info, BTRFS_ROOT_TREE_OBJECTID); bh = btrfs_read_dev_super(fs_devices->latest_bdev); - if (!bh) + if (!bh) { + err = -EINVAL; goto fail_iput; + } memcpy(&fs_info->super_copy, bh->b_data, sizeof(fs_info->super_copy)); memcpy(&fs_info->super_for_commit, &fs_info->super_copy, -- cgit v1.2.3 From 91ca338d776e0cefb255bf2979b6448febd880f5 Mon Sep 17 00:00:00 2001 From: Tsutomu Itoh Date: Wed, 5 Jan 2011 02:32:22 +0000 Subject: btrfs: check NULL or not Should check if functions returns NULL or not. Signed-off-by: Tsutomu Itoh Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f9efb68fc2e3..a0c37b2ee9ed 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -353,6 +353,10 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page) WARN_ON(len == 0); eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); + if (eb == NULL) { + WARN_ON(1); + goto out; + } ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE, btrfs_header_generation(eb)); BUG_ON(ret); @@ -427,6 +431,10 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, WARN_ON(len == 0); eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); + if (eb == NULL) { + ret = -EIO; + goto out; + } found_start = btrfs_header_bytenr(eb); if (found_start != start) { -- cgit v1.2.3 From 5e540f7715b8cd83b8e60beaaa525b125cc122de Mon Sep 17 00:00:00 2001 From: Tsutomu Itoh Date: Mon, 27 Dec 2010 06:53:10 +0000 Subject: btrfs: Fix memory leak in btrfs_read_fs_root_no_radix() In btrfs_read_fs_root_no_radix(), 'root' is not freed if btrfs_search_slot() returns error. Signed-off-by: Tsutomu Itoh Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a0c37b2ee9ed..9b1dd4138072 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1153,6 +1153,7 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, } btrfs_free_path(path); if (ret) { + kfree(root); if (ret > 0) ret = -ENOENT; return ERR_PTR(ret); -- cgit v1.2.3 From acce952b0263825da32cf10489413dec78053347 Mon Sep 17 00:00:00 2001 From: liubo Date: Thu, 6 Jan 2011 19:30:25 +0800 Subject: Btrfs: forced readonly mounts on errors This patch comes from "Forced readonly mounts on errors" ideas. As we know, this is the first step in being more fault tolerant of disk corruptions instead of just using BUG() statements. The major content: - add a framework for generating errors that should result in filesystems going readonly. - keep FS state in disk super block. - make sure that all of resource will be freed and released at umount time. - make sure that fter FS is forced readonly on error, there will be no more disk change before FS is corrected. For this, we should stop write operation. After this patch is applied, the conversion from BUG() to such a framework can happen incrementally. Signed-off-by: Liu Bo Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 391 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 389 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 9b1dd4138072..1a3af9e8e0c4 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -44,6 +44,20 @@ static struct extent_io_ops btree_extent_io_ops; static void end_workqueue_fn(struct btrfs_work *work); static void free_fs_root(struct btrfs_root *root); +static void btrfs_check_super_valid(struct btrfs_fs_info *fs_info, + int read_only); +static int btrfs_destroy_ordered_operations(struct btrfs_root *root); +static int btrfs_destroy_ordered_extents(struct btrfs_root *root); +static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, + struct btrfs_root *root); +static int btrfs_destroy_pending_snapshots(struct btrfs_transaction *t); +static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root); +static int btrfs_destroy_marked_extents(struct btrfs_root *root, + struct extent_io_tree *dirty_pages, + int mark); +static int btrfs_destroy_pinned_extent(struct btrfs_root *root, + struct extent_io_tree *pinned_extents); +static int btrfs_cleanup_transaction(struct btrfs_root *root); /* * end_io_wq structs are used to do processing in task context when an IO is @@ -1738,6 +1752,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, if (!btrfs_super_root(disk_super)) goto fail_iput; + /* check FS state, whether FS is broken. */ + fs_info->fs_state |= btrfs_super_flags(disk_super); + + btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY); + ret = btrfs_parse_options(tree_root, options); if (ret) { err = ret; @@ -1968,7 +1987,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, btrfs_set_opt(fs_info->mount_opt, SSD); } - if (btrfs_super_log_root(disk_super) != 0) { + /* do not make disk changes in broken FS */ + if (btrfs_super_log_root(disk_super) != 0 && + !(fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)) { u64 bytenr = btrfs_super_log_root(disk_super); if (fs_devices->rw_devices == 0) { @@ -2464,8 +2485,28 @@ int close_ctree(struct btrfs_root *root) smp_mb(); btrfs_put_block_group_cache(fs_info); + + /* + * Here come 2 situations when btrfs is broken to flip readonly: + * + * 1. when btrfs flips readonly somewhere else before + * btrfs_commit_super, sb->s_flags has MS_RDONLY flag, + * and btrfs will skip to write sb directly to keep + * ERROR state on disk. + * + * 2. when btrfs flips readonly just in btrfs_commit_super, + * and in such case, btrfs cannnot write sb via btrfs_commit_super, + * and since fs_state has been set BTRFS_SUPER_FLAG_ERROR flag, + * btrfs will cleanup all FS resources first and write sb then. + */ if (!(fs_info->sb->s_flags & MS_RDONLY)) { - ret = btrfs_commit_super(root); + ret = btrfs_commit_super(root); + if (ret) + printk(KERN_ERR "btrfs: commit super ret %d\n", ret); + } + + if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { + ret = btrfs_error_commit_super(root); if (ret) printk(KERN_ERR "btrfs: commit super ret %d\n", ret); } @@ -2641,6 +2682,352 @@ out: return 0; } +static void btrfs_check_super_valid(struct btrfs_fs_info *fs_info, + int read_only) +{ + if (read_only) + return; + + if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) + printk(KERN_WARNING "warning: mount fs with errors, " + "running btrfsck is recommended\n"); +} + +int btrfs_error_commit_super(struct btrfs_root *root) +{ + int ret; + + mutex_lock(&root->fs_info->cleaner_mutex); + btrfs_run_delayed_iputs(root); + mutex_unlock(&root->fs_info->cleaner_mutex); + + down_write(&root->fs_info->cleanup_work_sem); + up_write(&root->fs_info->cleanup_work_sem); + + /* cleanup FS via transaction */ + btrfs_cleanup_transaction(root); + + ret = write_ctree_super(NULL, root, 0); + + return ret; +} + +static int btrfs_destroy_ordered_operations(struct btrfs_root *root) +{ + struct btrfs_inode *btrfs_inode; + struct list_head splice; + + INIT_LIST_HEAD(&splice); + + mutex_lock(&root->fs_info->ordered_operations_mutex); + spin_lock(&root->fs_info->ordered_extent_lock); + + list_splice_init(&root->fs_info->ordered_operations, &splice); + while (!list_empty(&splice)) { + btrfs_inode = list_entry(splice.next, struct btrfs_inode, + ordered_operations); + + list_del_init(&btrfs_inode->ordered_operations); + + btrfs_invalidate_inodes(btrfs_inode->root); + } + + spin_unlock(&root->fs_info->ordered_extent_lock); + mutex_unlock(&root->fs_info->ordered_operations_mutex); + + return 0; +} + +static int btrfs_destroy_ordered_extents(struct btrfs_root *root) +{ + struct list_head splice; + struct btrfs_ordered_extent *ordered; + struct inode *inode; + + INIT_LIST_HEAD(&splice); + + spin_lock(&root->fs_info->ordered_extent_lock); + + list_splice_init(&root->fs_info->ordered_extents, &splice); + while (!list_empty(&splice)) { + ordered = list_entry(splice.next, struct btrfs_ordered_extent, + root_extent_list); + + list_del_init(&ordered->root_extent_list); + atomic_inc(&ordered->refs); + + /* the inode may be getting freed (in sys_unlink path). */ + inode = igrab(ordered->inode); + + spin_unlock(&root->fs_info->ordered_extent_lock); + if (inode) + iput(inode); + + atomic_set(&ordered->refs, 1); + btrfs_put_ordered_extent(ordered); + + spin_lock(&root->fs_info->ordered_extent_lock); + } + + spin_unlock(&root->fs_info->ordered_extent_lock); + + return 0; +} + +static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, + struct btrfs_root *root) +{ + struct rb_node *node; + struct btrfs_delayed_ref_root *delayed_refs; + struct btrfs_delayed_ref_node *ref; + int ret = 0; + + delayed_refs = &trans->delayed_refs; + + spin_lock(&delayed_refs->lock); + if (delayed_refs->num_entries == 0) { + printk(KERN_INFO "delayed_refs has NO entry\n"); + return ret; + } + + node = rb_first(&delayed_refs->root); + while (node) { + ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); + node = rb_next(node); + + ref->in_tree = 0; + rb_erase(&ref->rb_node, &delayed_refs->root); + delayed_refs->num_entries--; + + atomic_set(&ref->refs, 1); + if (btrfs_delayed_ref_is_head(ref)) { + struct btrfs_delayed_ref_head *head; + + head = btrfs_delayed_node_to_head(ref); + mutex_lock(&head->mutex); + kfree(head->extent_op); + delayed_refs->num_heads--; + if (list_empty(&head->cluster)) + delayed_refs->num_heads_ready--; + list_del_init(&head->cluster); + mutex_unlock(&head->mutex); + } + + spin_unlock(&delayed_refs->lock); + btrfs_put_delayed_ref(ref); + + cond_resched(); + spin_lock(&delayed_refs->lock); + } + + spin_unlock(&delayed_refs->lock); + + return ret; +} + +static int btrfs_destroy_pending_snapshots(struct btrfs_transaction *t) +{ + struct btrfs_pending_snapshot *snapshot; + struct list_head splice; + + INIT_LIST_HEAD(&splice); + + list_splice_init(&t->pending_snapshots, &splice); + + while (!list_empty(&splice)) { + snapshot = list_entry(splice.next, + struct btrfs_pending_snapshot, + list); + + list_del_init(&snapshot->list); + + kfree(snapshot); + } + + return 0; +} + +static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root) +{ + struct btrfs_inode *btrfs_inode; + struct list_head splice; + + INIT_LIST_HEAD(&splice); + + list_splice_init(&root->fs_info->delalloc_inodes, &splice); + + spin_lock(&root->fs_info->delalloc_lock); + + while (!list_empty(&splice)) { + btrfs_inode = list_entry(splice.next, struct btrfs_inode, + delalloc_inodes); + + list_del_init(&btrfs_inode->delalloc_inodes); + + btrfs_invalidate_inodes(btrfs_inode->root); + } + + spin_unlock(&root->fs_info->delalloc_lock); + + return 0; +} + +static int btrfs_destroy_marked_extents(struct btrfs_root *root, + struct extent_io_tree *dirty_pages, + int mark) +{ + int ret; + struct page *page; + struct inode *btree_inode = root->fs_info->btree_inode; + struct extent_buffer *eb; + u64 start = 0; + u64 end; + u64 offset; + unsigned long index; + + while (1) { + ret = find_first_extent_bit(dirty_pages, start, &start, &end, + mark); + if (ret) + break; + + clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS); + while (start <= end) { + index = start >> PAGE_CACHE_SHIFT; + start = (u64)(index + 1) << PAGE_CACHE_SHIFT; + page = find_get_page(btree_inode->i_mapping, index); + if (!page) + continue; + offset = page_offset(page); + + spin_lock(&dirty_pages->buffer_lock); + eb = radix_tree_lookup( + &(&BTRFS_I(page->mapping->host)->io_tree)->buffer, + offset >> PAGE_CACHE_SHIFT); + spin_unlock(&dirty_pages->buffer_lock); + if (eb) { + ret = test_and_clear_bit(EXTENT_BUFFER_DIRTY, + &eb->bflags); + atomic_set(&eb->refs, 1); + } + if (PageWriteback(page)) + end_page_writeback(page); + + lock_page(page); + if (PageDirty(page)) { + clear_page_dirty_for_io(page); + spin_lock_irq(&page->mapping->tree_lock); + radix_tree_tag_clear(&page->mapping->page_tree, + page_index(page), + PAGECACHE_TAG_DIRTY); + spin_unlock_irq(&page->mapping->tree_lock); + } + + page->mapping->a_ops->invalidatepage(page, 0); + unlock_page(page); + } + } + + return ret; +} + +static int btrfs_destroy_pinned_extent(struct btrfs_root *root, + struct extent_io_tree *pinned_extents) +{ + struct extent_io_tree *unpin; + u64 start; + u64 end; + int ret; + + unpin = pinned_extents; + while (1) { + ret = find_first_extent_bit(unpin, 0, &start, &end, + EXTENT_DIRTY); + if (ret) + break; + + /* opt_discard */ + ret = btrfs_error_discard_extent(root, start, end + 1 - start); + + clear_extent_dirty(unpin, start, end, GFP_NOFS); + btrfs_error_unpin_extent_range(root, start, end); + cond_resched(); + } + + return 0; +} + +static int btrfs_cleanup_transaction(struct btrfs_root *root) +{ + struct btrfs_transaction *t; + LIST_HEAD(list); + + WARN_ON(1); + + mutex_lock(&root->fs_info->trans_mutex); + mutex_lock(&root->fs_info->transaction_kthread_mutex); + + list_splice_init(&root->fs_info->trans_list, &list); + while (!list_empty(&list)) { + t = list_entry(list.next, struct btrfs_transaction, list); + if (!t) + break; + + btrfs_destroy_ordered_operations(root); + + btrfs_destroy_ordered_extents(root); + + btrfs_destroy_delayed_refs(t, root); + + btrfs_block_rsv_release(root, + &root->fs_info->trans_block_rsv, + t->dirty_pages.dirty_bytes); + + /* FIXME: cleanup wait for commit */ + t->in_commit = 1; + t->blocked = 1; + if (waitqueue_active(&root->fs_info->transaction_blocked_wait)) + wake_up(&root->fs_info->transaction_blocked_wait); + + t->blocked = 0; + if (waitqueue_active(&root->fs_info->transaction_wait)) + wake_up(&root->fs_info->transaction_wait); + mutex_unlock(&root->fs_info->trans_mutex); + + mutex_lock(&root->fs_info->trans_mutex); + t->commit_done = 1; + if (waitqueue_active(&t->commit_wait)) + wake_up(&t->commit_wait); + mutex_unlock(&root->fs_info->trans_mutex); + + mutex_lock(&root->fs_info->trans_mutex); + + btrfs_destroy_pending_snapshots(t); + + btrfs_destroy_delalloc_inodes(root); + + spin_lock(&root->fs_info->new_trans_lock); + root->fs_info->running_transaction = NULL; + spin_unlock(&root->fs_info->new_trans_lock); + + btrfs_destroy_marked_extents(root, &t->dirty_pages, + EXTENT_DIRTY); + + btrfs_destroy_pinned_extent(root, + root->fs_info->pinned_extents); + + t->use_count = 0; + list_del_init(&t->list); + memset(t, 0, sizeof(*t)); + kmem_cache_free(btrfs_transaction_cachep, t); + } + + mutex_unlock(&root->fs_info->transaction_kthread_mutex); + mutex_unlock(&root->fs_info->trans_mutex); + + return 0; +} + static struct extent_io_ops btree_extent_io_ops = { .write_cache_pages_lock_hook = btree_lock_page_hook, .readpage_end_io_hook = btree_readpage_end_io_hook, -- cgit v1.2.3 From 83a4d54840c88a4a45c49670f044b8c7ddeaa8c7 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 27 Dec 2010 16:19:53 +0800 Subject: Btrfs: Fix memory leak at umount fs_info, which is allocated in open_ctree(), should be freed in close_ctree(). Signed-off-by: Li Zefan --- fs/btrfs/disk-io.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a5d2249e6da5..089871e5cd5a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2513,6 +2513,8 @@ int close_ctree(struct btrfs_root *root) kfree(fs_info->chunk_root); kfree(fs_info->dev_root); kfree(fs_info->csum_root); + kfree(fs_info); + return 0; } -- cgit v1.2.3 From 3612b49598c303cfb22a4b609427f829828e2427 Mon Sep 17 00:00:00 2001 From: Tsutomu Itoh Date: Tue, 25 Jan 2011 02:51:38 +0000 Subject: btrfs: fix return value check of btrfs_join_transaction() The error check of btrfs_join_transaction()/btrfs_join_transaction_nolock() is added, and the mistake of the error check in several places is corrected. For more stable Btrfs, I think that we should reduce BUG_ON(). But, I think that long time is necessary for this. So, I propose this patch as a short-term solution. With this patch: - To more stable Btrfs, the part that should be corrected is clarified. - The panic isn't done by the NULL pointer reference etc. (even if BUG_ON() is increased temporarily) - The error code is returned in the place where the error can be easily returned. As a long-term plan: - BUG_ON() is reduced by using the forced-readonly framework, etc. Signed-off-by: Tsutomu Itoh Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 2887b8be6fdd..b36eeef19194 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1550,6 +1550,7 @@ static int transaction_kthread(void *arg) spin_unlock(&root->fs_info->new_trans_lock); trans = btrfs_join_transaction(root, 1); + BUG_ON(IS_ERR(trans)); if (transid == trans->transid) { ret = btrfs_commit_transaction(trans, root); BUG_ON(ret); @@ -2464,10 +2465,14 @@ int btrfs_commit_super(struct btrfs_root *root) up_write(&root->fs_info->cleanup_work_sem); trans = btrfs_join_transaction(root, 1); + if (IS_ERR(trans)) + return PTR_ERR(trans); ret = btrfs_commit_transaction(trans, root); BUG_ON(ret); /* run commit again to drop the original snapshot */ trans = btrfs_join_transaction(root, 1); + if (IS_ERR(trans)) + return PTR_ERR(trans); btrfs_commit_transaction(trans, root); ret = btrfs_write_and_wait_transaction(NULL, root); BUG_ON(ret); -- cgit v1.2.3 From eb14ab8ed24a0405fd056068b28c33a1cd846024 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 10 Feb 2011 12:35:00 -0500 Subject: Btrfs: fix page->private races There is a race where btrfs_releasepage can drop the page->private contents just as alloc_extent_buffer is setting up pages for metadata. Because of how the Btrfs page flags work, this results in us skipping the crc on the page during IO. This patch sovles the race by waiting until after the extent buffer is inserted into the radix tree before it sets page private. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b36eeef19194..3e1ea3e0477e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -359,10 +359,14 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page) tree = &BTRFS_I(page->mapping->host)->io_tree; - if (page->private == EXTENT_PAGE_PRIVATE) + if (page->private == EXTENT_PAGE_PRIVATE) { + WARN_ON(1); goto out; - if (!page->private) + } + if (!page->private) { + WARN_ON(1); goto out; + } len = page->private >> 2; WARN_ON(len == 0); -- cgit v1.2.3