From 34d52cb6c50b5a43901709998f59fb1c5a43dc4a Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 29 Mar 2011 13:46:06 +0800 Subject: Btrfs: Make free space cache code generic So we can re-use the code to cache free inode numbers. The change is quite straightforward. Two new structures are introduced. - struct btrfs_free_space_ctl We move those variables that are used for caching free space from struct btrfs_block_group_cache to this new struct. - struct btrfs_free_space_op We do block group specific work (e.g. calculation of extents threshold) through functions registered in this struct. And then we can remove references to struct btrfs_block_group_cache. Signed-off-by: Li Zefan --- fs/btrfs/extent-tree.c | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 31f33ba56fe8..904eae10ec65 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -105,6 +105,7 @@ void btrfs_put_block_group(struct btrfs_block_group_cache *cache) WARN_ON(cache->pinned > 0); WARN_ON(cache->reserved > 0); WARN_ON(cache->reserved_pinned > 0); + kfree(cache->free_space_ctl); kfree(cache); } } @@ -4893,7 +4894,7 @@ wait_block_group_cache_progress(struct btrfs_block_group_cache *cache, return 0; wait_event(caching_ctl->wait, block_group_cache_done(cache) || - (cache->free_space >= num_bytes)); + (cache->free_space_ctl->free_space >= num_bytes)); put_caching_control(caching_ctl); return 0; @@ -8551,10 +8552,16 @@ int btrfs_read_block_groups(struct btrfs_root *root) ret = -ENOMEM; goto error; } + cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl), + GFP_NOFS); + if (!cache->free_space_ctl) { + kfree(cache); + ret = -ENOMEM; + goto error; + } atomic_set(&cache->count, 1); spin_lock_init(&cache->lock); - spin_lock_init(&cache->tree_lock); cache->fs_info = info; INIT_LIST_HEAD(&cache->list); INIT_LIST_HEAD(&cache->cluster_list); @@ -8562,14 +8569,6 @@ int btrfs_read_block_groups(struct btrfs_root *root) if (need_clear) cache->disk_cache_state = BTRFS_DC_CLEAR; - /* - * we only want to have 32k of ram per block group for keeping - * track of free space, and if we pass 1/2 of that we want to - * start converting things over to using bitmaps - */ - cache->extents_thresh = ((1024 * 32) / 2) / - sizeof(struct btrfs_free_space); - read_extent_buffer(leaf, &cache->item, btrfs_item_ptr_offset(leaf, path->slots[0]), sizeof(cache->item)); @@ -8580,6 +8579,8 @@ int btrfs_read_block_groups(struct btrfs_root *root) cache->flags = btrfs_block_group_flags(&cache->item); cache->sectorsize = root->sectorsize; + btrfs_init_free_space_ctl(cache); + /* * We need to exclude the super stripes now so that the space * info has super bytes accounted for, otherwise we'll think @@ -8666,6 +8667,12 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, cache = kzalloc(sizeof(*cache), GFP_NOFS); if (!cache) return -ENOMEM; + cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl), + GFP_NOFS); + if (!cache->free_space_ctl) { + kfree(cache); + return -ENOMEM; + } cache->key.objectid = chunk_offset; cache->key.offset = size; @@ -8673,19 +8680,13 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, cache->sectorsize = root->sectorsize; cache->fs_info = root->fs_info; - /* - * we only want to have 32k of ram per block group for keeping track - * of free space, and if we pass 1/2 of that we want to start - * converting things over to using bitmaps - */ - cache->extents_thresh = ((1024 * 32) / 2) / - sizeof(struct btrfs_free_space); atomic_set(&cache->count, 1); spin_lock_init(&cache->lock); - spin_lock_init(&cache->tree_lock); INIT_LIST_HEAD(&cache->list); INIT_LIST_HEAD(&cache->cluster_list); + btrfs_init_free_space_ctl(cache); + btrfs_set_block_group_used(&cache->item, bytes_used); btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid); cache->flags = type; -- cgit v1.2.3 From 33345d01522f8152f99dc84a3e7a1a45707f387f Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 20 Apr 2011 10:31:50 +0800 Subject: Btrfs: Always use 64bit inode number There's a potential problem in 32bit system when we exhaust 32bit inode numbers and start to allocate big inode numbers, because btrfs uses inode->i_ino in many places. So here we always use BTRFS_I(inode)->location.objectid, which is an u64 variable. There are 2 exceptions that BTRFS_I(inode)->location.objectid != inode->i_ino: the btree inode (0 vs 1) and empty subvol dirs (256 vs 2), and inode->i_ino will be used in those cases. Another reason to make this change is I'm going to use a special inode to save free ino cache, and the inode number must be > (u64)-256. Signed-off-by: Li Zefan --- fs/btrfs/extent-tree.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 904eae10ec65..a0e818cb0401 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -7009,8 +7009,8 @@ static noinline int get_new_locations(struct inode *reloc_inode, cur_pos = extent_key->objectid - offset; last_byte = extent_key->objectid + extent_key->offset; - ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino, - cur_pos, 0); + ret = btrfs_lookup_file_extent(NULL, root, path, + btrfs_ino(reloc_inode), cur_pos, 0); if (ret < 0) goto out; if (ret > 0) { @@ -7033,7 +7033,7 @@ static noinline int get_new_locations(struct inode *reloc_inode, btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); if (found_key.offset != cur_pos || found_key.type != BTRFS_EXTENT_DATA_KEY || - found_key.objectid != reloc_inode->i_ino) + found_key.objectid != btrfs_ino(reloc_inode)) break; fi = btrfs_item_ptr(leaf, path->slots[0], @@ -7179,7 +7179,7 @@ next: break; } - if (inode && key.objectid != inode->i_ino) { + if (inode && key.objectid != btrfs_ino(inode)) { BUG_ON(extent_locked); btrfs_release_path(root, path); mutex_unlock(&inode->i_mutex); @@ -7488,7 +7488,7 @@ static noinline int invalidate_extent_cache(struct btrfs_root *root, continue; if (btrfs_file_extent_disk_bytenr(leaf, fi) == 0) continue; - if (!inode || inode->i_ino != key.objectid) { + if (!inode || btrfs_ino(inode) != key.objectid) { iput(inode); inode = btrfs_ilookup(target_root->fs_info->sb, key.objectid, target_root, 1); -- cgit v1.2.3 From 82d5902d9c681be37ffa9d70482907f9f0b7ec1f Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 20 Apr 2011 10:33:24 +0800 Subject: Btrfs: Support reading/writing on disk free ino cache This is similar to block group caching. We dedicate a special inode in fs tree to save free ino cache. At the very first time we create/delete a file after mount, the free ino cache will be loaded from disk into memory. When the fs tree is commited, the cache will be written back to disk. To keep compatibility, we check the root generation against the generation of the special inode when loading the cache, so the loading will fail if the btrfs filesystem was mounted in an older kernel before. Signed-off-by: Li Zefan --- fs/btrfs/extent-tree.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index a0e818cb0401..95ce8da63b28 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3145,7 +3145,8 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes) /* make sure bytes are sectorsize aligned */ bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); - if (root == root->fs_info->tree_root) { + if (root == root->fs_info->tree_root || + BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID) { alloc_chunk = 0; committed = 1; } -- cgit v1.2.3 From 62a45b60923a576170a1a0c309c240d9f40d193d Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Apr 2011 15:52:26 +0200 Subject: btrfs: make functions static when possible Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index cd52f7f556ef..7cdce82e03e7 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -94,7 +94,7 @@ static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) return (cache->flags & bits) == bits; } -void btrfs_get_block_group(struct btrfs_block_group_cache *cache) +static void btrfs_get_block_group(struct btrfs_block_group_cache *cache) { atomic_inc(&cache->count); } @@ -3651,8 +3651,8 @@ static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv, spin_unlock(&block_rsv->lock); } -void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv, - struct btrfs_block_rsv *dest, u64 num_bytes) +static void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv, + struct btrfs_block_rsv *dest, u64 num_bytes) { struct btrfs_space_info *space_info = block_rsv->space_info; -- cgit v1.2.3 From 172ddd60a662c4d8bf2809462866ddddd6431ea5 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 21 Apr 2011 00:48:27 +0200 Subject: btrfs: drop gfp parameter from alloc_extent_map pass GFP_NOFS directly to kmem_cache_alloc Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 7cdce82e03e7..6a3d53783d55 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -6694,7 +6694,7 @@ static noinline int relocate_data_extent(struct inode *reloc_inode, u64 start = extent_key->objectid - offset; u64 end = start + extent_key->offset - 1; - em = alloc_extent_map(GFP_NOFS); + em = alloc_extent_map(); BUG_ON(!em); em->start = start; -- cgit v1.2.3 From b3b4aa74b58bded927f579fff787fb6fa1c0393c Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 21 Apr 2011 01:20:15 +0200 Subject: btrfs: drop unused parameter from btrfs_release_path parameter tree root it's not used since commit 5f39d397dfbe140a14edecd4e73c34ce23c4f9ee ("Btrfs: Create extent_buffer interface for large blocksizes") Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 6a3d53783d55..a160f11465f8 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -379,7 +379,7 @@ again: break; caching_ctl->progress = last; - btrfs_release_path(extent_root, path); + btrfs_release_path(path); up_read(&fs_info->extent_commit_sem); mutex_unlock(&caching_ctl->mutex); if (btrfs_transaction_in_commit(fs_info)) @@ -754,7 +754,7 @@ again: atomic_inc(&head->node.refs); spin_unlock(&delayed_refs->lock); - btrfs_release_path(root->fs_info->extent_root, path); + btrfs_release_path(path); mutex_lock(&head->mutex); mutex_unlock(&head->mutex); @@ -934,7 +934,7 @@ static int convert_extent_item_v0(struct btrfs_trans_handle *trans, break; } } - btrfs_release_path(root, path); + btrfs_release_path(path); if (owner < BTRFS_FIRST_FREE_OBJECTID) new_size += sizeof(*bi); @@ -1042,7 +1042,7 @@ again: return 0; #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 key.type = BTRFS_EXTENT_REF_V0_KEY; - btrfs_release_path(root, path); + btrfs_release_path(path); ret = btrfs_search_slot(trans, root, &key, path, -1, 1); if (ret < 0) { err = ret; @@ -1080,7 +1080,7 @@ again: if (match_extent_data_ref(leaf, ref, root_objectid, owner, offset)) { if (recow) { - btrfs_release_path(root, path); + btrfs_release_path(path); goto again; } err = 0; @@ -1141,7 +1141,7 @@ static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans, if (match_extent_data_ref(leaf, ref, root_objectid, owner, offset)) break; - btrfs_release_path(root, path); + btrfs_release_path(path); key.offset++; ret = btrfs_insert_empty_item(trans, root, path, &key, size); @@ -1167,7 +1167,7 @@ static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(leaf); ret = 0; fail: - btrfs_release_path(root, path); + btrfs_release_path(path); return ret; } @@ -1293,7 +1293,7 @@ static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans, ret = -ENOENT; #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 if (ret == -ENOENT && parent) { - btrfs_release_path(root, path); + btrfs_release_path(path); key.type = BTRFS_EXTENT_REF_V0_KEY; ret = btrfs_search_slot(trans, root, &key, path, -1, 1); if (ret > 0) @@ -1322,7 +1322,7 @@ static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans, } ret = btrfs_insert_empty_item(trans, root, path, &key, 0); - btrfs_release_path(root, path); + btrfs_release_path(path); return ret; } @@ -1608,7 +1608,7 @@ static int lookup_extent_backref(struct btrfs_trans_handle *trans, if (ret != -ENOENT) return ret; - btrfs_release_path(root, path); + btrfs_release_path(path); *ref_ret = NULL; if (owner < BTRFS_FIRST_FREE_OBJECTID) { @@ -1862,7 +1862,7 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, __run_delayed_extent_op(extent_op, leaf, item); btrfs_mark_buffer_dirty(leaf); - btrfs_release_path(root->fs_info->extent_root, path); + btrfs_release_path(path); path->reada = 1; path->leave_spinning = 1; @@ -2361,7 +2361,7 @@ static noinline int check_delayed_ref(struct btrfs_trans_handle *trans, atomic_inc(&head->node.refs); spin_unlock(&delayed_refs->lock); - btrfs_release_path(root->fs_info->extent_root, path); + btrfs_release_path(path); mutex_lock(&head->mutex); mutex_unlock(&head->mutex); @@ -2732,7 +2732,7 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans, bi = btrfs_item_ptr_offset(leaf, path->slots[0]); write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item)); btrfs_mark_buffer_dirty(leaf); - btrfs_release_path(extent_root, path); + btrfs_release_path(path); fail: if (ret) return ret; @@ -2785,7 +2785,7 @@ again: inode = lookup_free_space_inode(root, block_group, path); if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) { ret = PTR_ERR(inode); - btrfs_release_path(root, path); + btrfs_release_path(path); goto out; } @@ -2854,7 +2854,7 @@ again: out_put: iput(inode); out_free: - btrfs_release_path(root, path); + btrfs_release_path(path); out: spin_lock(&block_group->lock); block_group->disk_cache_state = dcs; @@ -4541,7 +4541,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, NULL, refs_to_drop, is_data); BUG_ON(ret); - btrfs_release_path(extent_root, path); + btrfs_release_path(path); path->leave_spinning = 1; key.objectid = bytenr; @@ -4580,7 +4580,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, owner_objectid, 0); BUG_ON(ret < 0); - btrfs_release_path(extent_root, path); + btrfs_release_path(path); path->leave_spinning = 1; key.objectid = bytenr; @@ -4650,7 +4650,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, ret = btrfs_del_items(trans, extent_root, path, path->slots[0], num_to_del); BUG_ON(ret); - btrfs_release_path(extent_root, path); + btrfs_release_path(path); if (is_data) { ret = btrfs_del_csums(trans, root, bytenr, num_bytes); @@ -6480,7 +6480,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, trans->block_rsv = block_rsv; } } - btrfs_release_path(root, path); + btrfs_release_path(path); BUG_ON(err); ret = btrfs_del_root(trans, tree_root, &root->root_key); @@ -8580,7 +8580,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) memcpy(&cache->key, &found_key, sizeof(found_key)); key.objectid = found_key.objectid + found_key.offset; - btrfs_release_path(root, path); + btrfs_release_path(path); cache->flags = btrfs_block_group_flags(&cache->item); cache->sectorsize = root->sectorsize; @@ -8802,12 +8802,12 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, if (ret < 0) goto out; if (ret > 0) - btrfs_release_path(tree_root, path); + btrfs_release_path(path); if (ret == 0) { ret = btrfs_del_item(trans, tree_root, path); if (ret) goto out; - btrfs_release_path(tree_root, path); + btrfs_release_path(path); } spin_lock(&root->fs_info->block_group_cache_lock); -- cgit v1.2.3 From 8cc33e5c19bf01c7617608669be8c1b4f663eb2f Mon Sep 17 00:00:00 2001 From: David Sterba Date: Mon, 2 May 2011 15:29:25 +0200 Subject: btrfs: Document a mutex lock/unlock sequence --- fs/btrfs/extent-tree.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index a160f11465f8..fba1348cb2a0 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -756,6 +756,10 @@ again: btrfs_release_path(path); + /* + * Mutex was contended, block until it's released and try + * again + */ mutex_lock(&head->mutex); mutex_unlock(&head->mutex); btrfs_put_delayed_ref(&head->node); @@ -2297,6 +2301,10 @@ again: atomic_inc(&ref->refs); spin_unlock(&delayed_refs->lock); + /* + * Mutex was contended, block until it's + * released and try again + */ mutex_lock(&head->mutex); mutex_unlock(&head->mutex); @@ -2363,6 +2371,10 @@ static noinline int check_delayed_ref(struct btrfs_trans_handle *trans, btrfs_release_path(path); + /* + * Mutex was contended, block until it's released and let + * caller try again + */ mutex_lock(&head->mutex); mutex_unlock(&head->mutex); btrfs_put_delayed_ref(&head->node); -- cgit v1.2.3 From 182608c8294b5fe90d7bbd4b026c82bf0a24b736 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 5 May 2011 13:13:16 +0200 Subject: btrfs: remove old unused commented out code Remove code which has been #if0-ed out for a very long time and does not seem to be related to current codebase anymore. Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 1661 +----------------------------------------------- 1 file changed, 1 insertion(+), 1660 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index fba1348cb2a0..b457f195636e 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2522,126 +2522,6 @@ out: return ret; } -#if 0 -int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct extent_buffer *buf, u32 nr_extents) -{ - struct btrfs_key key; - struct btrfs_file_extent_item *fi; - u64 root_gen; - u32 nritems; - int i; - int level; - int ret = 0; - int shared = 0; - - if (!root->ref_cows) - return 0; - - if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { - shared = 0; - root_gen = root->root_key.offset; - } else { - shared = 1; - root_gen = trans->transid - 1; - } - - level = btrfs_header_level(buf); - nritems = btrfs_header_nritems(buf); - - if (level == 0) { - struct btrfs_leaf_ref *ref; - struct btrfs_extent_info *info; - - ref = btrfs_alloc_leaf_ref(root, nr_extents); - if (!ref) { - ret = -ENOMEM; - goto out; - } - - ref->root_gen = root_gen; - ref->bytenr = buf->start; - ref->owner = btrfs_header_owner(buf); - ref->generation = btrfs_header_generation(buf); - ref->nritems = nr_extents; - info = ref->extents; - - for (i = 0; nr_extents > 0 && i < nritems; i++) { - u64 disk_bytenr; - btrfs_item_key_to_cpu(buf, &key, i); - if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) - continue; - fi = btrfs_item_ptr(buf, i, - struct btrfs_file_extent_item); - if (btrfs_file_extent_type(buf, fi) == - BTRFS_FILE_EXTENT_INLINE) - continue; - disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi); - if (disk_bytenr == 0) - continue; - - info->bytenr = disk_bytenr; - info->num_bytes = - btrfs_file_extent_disk_num_bytes(buf, fi); - info->objectid = key.objectid; - info->offset = key.offset; - info++; - } - - ret = btrfs_add_leaf_ref(root, ref, shared); - if (ret == -EEXIST && shared) { - struct btrfs_leaf_ref *old; - old = btrfs_lookup_leaf_ref(root, ref->bytenr); - BUG_ON(!old); - btrfs_remove_leaf_ref(root, old); - btrfs_free_leaf_ref(root, old); - ret = btrfs_add_leaf_ref(root, ref, shared); - } - WARN_ON(ret); - btrfs_free_leaf_ref(root, ref); - } -out: - return ret; -} - -/* when a block goes through cow, we update the reference counts of - * everything that block points to. The internal pointers of the block - * can be in just about any order, and it is likely to have clusters of - * things that are close together and clusters of things that are not. - * - * To help reduce the seeks that come with updating all of these reference - * counts, sort them by byte number before actual updates are done. - * - * struct refsort is used to match byte number to slot in the btree block. - * we sort based on the byte number and then use the slot to actually - * find the item. - * - * struct refsort is smaller than strcut btrfs_item and smaller than - * struct btrfs_key_ptr. Since we're currently limited to the page size - * for a btree block, there's no way for a kmalloc of refsorts for a - * single node to be bigger than a page. - */ -struct refsort { - u64 bytenr; - u32 slot; -}; - -/* - * for passing into sort() - */ -static int refsort_cmp(const void *a_void, const void *b_void) -{ - const struct refsort *a = a_void; - const struct refsort *b = b_void; - - if (a->bytenr < b->bytenr) - return -1; - if (a->bytenr > b->bytenr) - return 1; - return 0; -} -#endif - static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, @@ -3223,18 +3103,6 @@ commit_trans: goto again; } -#if 0 /* I hope we never need this code again, just in case */ - printk(KERN_ERR "no space left, need %llu, %llu bytes_used, " - "%llu bytes_reserved, " "%llu bytes_pinned, " - "%llu bytes_readonly, %llu may use %llu total\n", - (unsigned long long)bytes, - (unsigned long long)data_sinfo->bytes_used, - (unsigned long long)data_sinfo->bytes_reserved, - (unsigned long long)data_sinfo->bytes_pinned, - (unsigned long long)data_sinfo->bytes_readonly, - (unsigned long long)data_sinfo->bytes_may_use, - (unsigned long long)data_sinfo->total_bytes); -#endif return -ENOSPC; } data_sinfo->bytes_may_use += bytes; @@ -3867,23 +3735,7 @@ static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info) u64 meta_used; u64 data_used; int csum_size = btrfs_super_csum_size(&fs_info->super_copy); -#if 0 - /* - * per tree used space accounting can be inaccuracy, so we - * can't rely on it. - */ - spin_lock(&fs_info->extent_root->accounting_lock); - num_bytes = btrfs_root_used(&fs_info->extent_root->root_item); - spin_unlock(&fs_info->extent_root->accounting_lock); - spin_lock(&fs_info->csum_root->accounting_lock); - num_bytes += btrfs_root_used(&fs_info->csum_root->root_item); - spin_unlock(&fs_info->csum_root->accounting_lock); - - spin_lock(&fs_info->tree_root->accounting_lock); - num_bytes += btrfs_root_used(&fs_info->tree_root->root_item); - spin_unlock(&fs_info->tree_root->accounting_lock); -#endif sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA); spin_lock(&sinfo->lock); data_used = sinfo->bytes_used; @@ -3936,10 +3788,7 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info) block_rsv->reserved = block_rsv->size; block_rsv->full = 1; } -#if 0 - printk(KERN_INFO"global block rsv size %llu reserved %llu\n", - block_rsv->size, block_rsv->reserved); -#endif + spin_unlock(&sinfo->lock); spin_unlock(&block_rsv->lock); } @@ -6596,1514 +6445,6 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, return ret; } -#if 0 -static unsigned long calc_ra(unsigned long start, unsigned long last, - unsigned long nr) -{ - return min(last, start + nr - 1); -} - -static noinline int relocate_inode_pages(struct inode *inode, u64 start, - u64 len) -{ - u64 page_start; - u64 page_end; - unsigned long first_index; - unsigned long last_index; - unsigned long i; - struct page *page; - struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; - struct file_ra_state *ra; - struct btrfs_ordered_extent *ordered; - unsigned int total_read = 0; - unsigned int total_dirty = 0; - int ret = 0; - - ra = kzalloc(sizeof(*ra), GFP_NOFS); - if (!ra) - return -ENOMEM; - - mutex_lock(&inode->i_mutex); - first_index = start >> PAGE_CACHE_SHIFT; - last_index = (start + len - 1) >> PAGE_CACHE_SHIFT; - - /* make sure the dirty trick played by the caller work */ - ret = invalidate_inode_pages2_range(inode->i_mapping, - first_index, last_index); - if (ret) - goto out_unlock; - - file_ra_state_init(ra, inode->i_mapping); - - for (i = first_index ; i <= last_index; i++) { - if (total_read % ra->ra_pages == 0) { - btrfs_force_ra(inode->i_mapping, ra, NULL, i, - calc_ra(i, last_index, ra->ra_pages)); - } - total_read++; -again: - if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode)) - BUG_ON(1); - page = grab_cache_page(inode->i_mapping, i); - if (!page) { - ret = -ENOMEM; - goto out_unlock; - } - if (!PageUptodate(page)) { - btrfs_readpage(NULL, page); - lock_page(page); - if (!PageUptodate(page)) { - unlock_page(page); - page_cache_release(page); - ret = -EIO; - goto out_unlock; - } - } - wait_on_page_writeback(page); - - page_start = (u64)page->index << PAGE_CACHE_SHIFT; - page_end = page_start + PAGE_CACHE_SIZE - 1; - lock_extent(io_tree, page_start, page_end, GFP_NOFS); - - ordered = btrfs_lookup_ordered_extent(inode, page_start); - if (ordered) { - unlock_extent(io_tree, page_start, page_end, GFP_NOFS); - unlock_page(page); - page_cache_release(page); - btrfs_start_ordered_extent(inode, ordered, 1); - btrfs_put_ordered_extent(ordered); - goto again; - } - set_page_extent_mapped(page); - - if (i == first_index) - set_extent_bits(io_tree, page_start, page_end, - EXTENT_BOUNDARY, GFP_NOFS); - btrfs_set_extent_delalloc(inode, page_start, page_end); - - set_page_dirty(page); - total_dirty++; - - unlock_extent(io_tree, page_start, page_end, GFP_NOFS); - unlock_page(page); - page_cache_release(page); - } - -out_unlock: - kfree(ra); - mutex_unlock(&inode->i_mutex); - balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty); - return ret; -} - -static noinline int relocate_data_extent(struct inode *reloc_inode, - struct btrfs_key *extent_key, - u64 offset) -{ - struct btrfs_root *root = BTRFS_I(reloc_inode)->root; - struct extent_map_tree *em_tree = &BTRFS_I(reloc_inode)->extent_tree; - struct extent_map *em; - u64 start = extent_key->objectid - offset; - u64 end = start + extent_key->offset - 1; - - em = alloc_extent_map(); - BUG_ON(!em); - - em->start = start; - em->len = extent_key->offset; - em->block_len = extent_key->offset; - em->block_start = extent_key->objectid; - em->bdev = root->fs_info->fs_devices->latest_bdev; - set_bit(EXTENT_FLAG_PINNED, &em->flags); - - /* setup extent map to cheat btrfs_readpage */ - lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS); - while (1) { - int ret; - write_lock(&em_tree->lock); - ret = add_extent_mapping(em_tree, em); - write_unlock(&em_tree->lock); - if (ret != -EEXIST) { - free_extent_map(em); - break; - } - btrfs_drop_extent_cache(reloc_inode, start, end, 0); - } - unlock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS); - - return relocate_inode_pages(reloc_inode, start, extent_key->offset); -} - -struct btrfs_ref_path { - u64 extent_start; - u64 nodes[BTRFS_MAX_LEVEL]; - u64 root_objectid; - u64 root_generation; - u64 owner_objectid; - u32 num_refs; - int lowest_level; - int current_level; - int shared_level; - - struct btrfs_key node_keys[BTRFS_MAX_LEVEL]; - u64 new_nodes[BTRFS_MAX_LEVEL]; -}; - -struct disk_extent { - u64 ram_bytes; - u64 disk_bytenr; - u64 disk_num_bytes; - u64 offset; - u64 num_bytes; - u8 compression; - u8 encryption; - u16 other_encoding; -}; - -static int is_cowonly_root(u64 root_objectid) -{ - if (root_objectid == BTRFS_ROOT_TREE_OBJECTID || - root_objectid == BTRFS_EXTENT_TREE_OBJECTID || - root_objectid == BTRFS_CHUNK_TREE_OBJECTID || - root_objectid == BTRFS_DEV_TREE_OBJECTID || - root_objectid == BTRFS_TREE_LOG_OBJECTID || - root_objectid == BTRFS_CSUM_TREE_OBJECTID) - return 1; - return 0; -} - -static noinline int __next_ref_path(struct btrfs_trans_handle *trans, - struct btrfs_root *extent_root, - struct btrfs_ref_path *ref_path, - int first_time) -{ - struct extent_buffer *leaf; - struct btrfs_path *path; - struct btrfs_extent_ref *ref; - struct btrfs_key key; - struct btrfs_key found_key; - u64 bytenr; - u32 nritems; - int level; - int ret = 1; - - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - - if (first_time) { - ref_path->lowest_level = -1; - ref_path->current_level = -1; - ref_path->shared_level = -1; - goto walk_up; - } -walk_down: - level = ref_path->current_level - 1; - while (level >= -1) { - u64 parent; - if (level < ref_path->lowest_level) - break; - - if (level >= 0) - bytenr = ref_path->nodes[level]; - else - bytenr = ref_path->extent_start; - BUG_ON(bytenr == 0); - - parent = ref_path->nodes[level + 1]; - ref_path->nodes[level + 1] = 0; - ref_path->current_level = level; - BUG_ON(parent == 0); - - key.objectid = bytenr; - key.offset = parent + 1; - key.type = BTRFS_EXTENT_REF_KEY; - - ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0); - if (ret < 0) - goto out; - BUG_ON(ret == 0); - - leaf = path->nodes[0]; - nritems = btrfs_header_nritems(leaf); - if (path->slots[0] >= nritems) { - ret = btrfs_next_leaf(extent_root, path); - if (ret < 0) - goto out; - if (ret > 0) - goto next; - leaf = path->nodes[0]; - } - - btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - if (found_key.objectid == bytenr && - found_key.type == BTRFS_EXTENT_REF_KEY) { - if (level < ref_path->shared_level) - ref_path->shared_level = level; - goto found; - } -next: - level--; - btrfs_release_path(extent_root, path); - cond_resched(); - } - /* reached lowest level */ - ret = 1; - goto out; -walk_up: - level = ref_path->current_level; - while (level < BTRFS_MAX_LEVEL - 1) { - u64 ref_objectid; - - if (level >= 0) - bytenr = ref_path->nodes[level]; - else - bytenr = ref_path->extent_start; - - BUG_ON(bytenr == 0); - - key.objectid = bytenr; - key.offset = 0; - key.type = BTRFS_EXTENT_REF_KEY; - - ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0); - if (ret < 0) - goto out; - - leaf = path->nodes[0]; - nritems = btrfs_header_nritems(leaf); - if (path->slots[0] >= nritems) { - ret = btrfs_next_leaf(extent_root, path); - if (ret < 0) - goto out; - if (ret > 0) { - /* the extent was freed by someone */ - if (ref_path->lowest_level == level) - goto out; - btrfs_release_path(extent_root, path); - goto walk_down; - } - leaf = path->nodes[0]; - } - - btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - if (found_key.objectid != bytenr || - found_key.type != BTRFS_EXTENT_REF_KEY) { - /* the extent was freed by someone */ - if (ref_path->lowest_level == level) { - ret = 1; - goto out; - } - btrfs_release_path(extent_root, path); - goto walk_down; - } -found: - ref = btrfs_item_ptr(leaf, path->slots[0], - struct btrfs_extent_ref); - ref_objectid = btrfs_ref_objectid(leaf, ref); - if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID) { - if (first_time) { - level = (int)ref_objectid; - BUG_ON(level >= BTRFS_MAX_LEVEL); - ref_path->lowest_level = level; - ref_path->current_level = level; - ref_path->nodes[level] = bytenr; - } else { - WARN_ON(ref_objectid != level); - } - } else { - WARN_ON(level != -1); - } - first_time = 0; - - if (ref_path->lowest_level == level) { - ref_path->owner_objectid = ref_objectid; - ref_path->num_refs = btrfs_ref_num_refs(leaf, ref); - } - - /* - * the block is tree root or the block isn't in reference - * counted tree. - */ - if (found_key.objectid == found_key.offset || - is_cowonly_root(btrfs_ref_root(leaf, ref))) { - ref_path->root_objectid = btrfs_ref_root(leaf, ref); - ref_path->root_generation = - btrfs_ref_generation(leaf, ref); - if (level < 0) { - /* special reference from the tree log */ - ref_path->nodes[0] = found_key.offset; - ref_path->current_level = 0; - } - ret = 0; - goto out; - } - - level++; - BUG_ON(ref_path->nodes[level] != 0); - ref_path->nodes[level] = found_key.offset; - ref_path->current_level = level; - - /* - * the reference was created in the running transaction, - * no need to continue walking up. - */ - if (btrfs_ref_generation(leaf, ref) == trans->transid) { - ref_path->root_objectid = btrfs_ref_root(leaf, ref); - ref_path->root_generation = - btrfs_ref_generation(leaf, ref); - ret = 0; - goto out; - } - - btrfs_release_path(extent_root, path); - cond_resched(); - } - /* reached max tree level, but no tree root found. */ - BUG(); -out: - btrfs_free_path(path); - return ret; -} - -static int btrfs_first_ref_path(struct btrfs_trans_handle *trans, - struct btrfs_root *extent_root, - struct btrfs_ref_path *ref_path, - u64 extent_start) -{ - memset(ref_path, 0, sizeof(*ref_path)); - ref_path->extent_start = extent_start; - - return __next_ref_path(trans, extent_root, ref_path, 1); -} - -static int btrfs_next_ref_path(struct btrfs_trans_handle *trans, - struct btrfs_root *extent_root, - struct btrfs_ref_path *ref_path) -{ - return __next_ref_path(trans, extent_root, ref_path, 0); -} - -static noinline int get_new_locations(struct inode *reloc_inode, - struct btrfs_key *extent_key, - u64 offset, int no_fragment, - struct disk_extent **extents, - int *nr_extents) -{ - struct btrfs_root *root = BTRFS_I(reloc_inode)->root; - struct btrfs_path *path; - struct btrfs_file_extent_item *fi; - struct extent_buffer *leaf; - struct disk_extent *exts = *extents; - struct btrfs_key found_key; - u64 cur_pos; - u64 last_byte; - u32 nritems; - int nr = 0; - int max = *nr_extents; - int ret; - - WARN_ON(!no_fragment && *extents); - if (!exts) { - max = 1; - exts = kmalloc(sizeof(*exts) * max, GFP_NOFS); - if (!exts) - return -ENOMEM; - } - - path = btrfs_alloc_path(); - if (!path) { - if (exts != *extents) - kfree(exts); - return -ENOMEM; - } - - cur_pos = extent_key->objectid - offset; - last_byte = extent_key->objectid + extent_key->offset; - ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino, - cur_pos, 0); - if (ret < 0) - goto out; - if (ret > 0) { - ret = -ENOENT; - goto out; - } - - while (1) { - leaf = path->nodes[0]; - nritems = btrfs_header_nritems(leaf); - if (path->slots[0] >= nritems) { - ret = btrfs_next_leaf(root, path); - if (ret < 0) - goto out; - if (ret > 0) - break; - leaf = path->nodes[0]; - } - - btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - if (found_key.offset != cur_pos || - found_key.type != BTRFS_EXTENT_DATA_KEY || - found_key.objectid != reloc_inode->i_ino) - break; - - fi = btrfs_item_ptr(leaf, path->slots[0], - struct btrfs_file_extent_item); - if (btrfs_file_extent_type(leaf, fi) != - BTRFS_FILE_EXTENT_REG || - btrfs_file_extent_disk_bytenr(leaf, fi) == 0) - break; - - if (nr == max) { - struct disk_extent *old = exts; - max *= 2; - exts = kzalloc(sizeof(*exts) * max, GFP_NOFS); - if (!exts) { - ret = -ENOMEM; - goto out; - } - memcpy(exts, old, sizeof(*exts) * nr); - if (old != *extents) - kfree(old); - } - - exts[nr].disk_bytenr = - btrfs_file_extent_disk_bytenr(leaf, fi); - exts[nr].disk_num_bytes = - btrfs_file_extent_disk_num_bytes(leaf, fi); - exts[nr].offset = btrfs_file_extent_offset(leaf, fi); - exts[nr].num_bytes = btrfs_file_extent_num_bytes(leaf, fi); - exts[nr].ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi); - exts[nr].compression = btrfs_file_extent_compression(leaf, fi); - exts[nr].encryption = btrfs_file_extent_encryption(leaf, fi); - exts[nr].other_encoding = btrfs_file_extent_other_encoding(leaf, - fi); - BUG_ON(exts[nr].offset > 0); - BUG_ON(exts[nr].compression || exts[nr].encryption); - BUG_ON(exts[nr].num_bytes != exts[nr].disk_num_bytes); - - cur_pos += exts[nr].num_bytes; - nr++; - - if (cur_pos + offset >= last_byte) - break; - - if (no_fragment) { - ret = 1; - goto out; - } - path->slots[0]++; - } - - BUG_ON(cur_pos + offset > last_byte); - if (cur_pos + offset < last_byte) { - ret = -ENOENT; - goto out; - } - ret = 0; -out: - btrfs_free_path(path); - if (ret) { - if (exts != *extents) - kfree(exts); - } else { - *extents = exts; - *nr_extents = nr; - } - return ret; -} - -static noinline int replace_one_extent(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, - struct btrfs_key *extent_key, - struct btrfs_key *leaf_key, - struct btrfs_ref_path *ref_path, - struct disk_extent *new_extents, - int nr_extents) -{ - struct extent_buffer *leaf; - struct btrfs_file_extent_item *fi; - struct inode *inode = NULL; - struct btrfs_key key; - u64 lock_start = 0; - u64 lock_end = 0; - u64 num_bytes; - u64 ext_offset; - u64 search_end = (u64)-1; - u32 nritems; - int nr_scaned = 0; - int extent_locked = 0; - int extent_type; - int ret; - - memcpy(&key, leaf_key, sizeof(key)); - if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) { - if (key.objectid < ref_path->owner_objectid || - (key.objectid == ref_path->owner_objectid && - key.type < BTRFS_EXTENT_DATA_KEY)) { - key.objectid = ref_path->owner_objectid; - key.type = BTRFS_EXTENT_DATA_KEY; - key.offset = 0; - } - } - - while (1) { - ret = btrfs_search_slot(trans, root, &key, path, 0, 1); - if (ret < 0) - goto out; - - leaf = path->nodes[0]; - nritems = btrfs_header_nritems(leaf); -next: - if (extent_locked && ret > 0) { - /* - * the file extent item was modified by someone - * before the extent got locked. - */ - unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, - lock_end, GFP_NOFS); - extent_locked = 0; - } - - if (path->slots[0] >= nritems) { - if (++nr_scaned > 2) - break; - - BUG_ON(extent_locked); - ret = btrfs_next_leaf(root, path); - if (ret < 0) - goto out; - if (ret > 0) - break; - leaf = path->nodes[0]; - nritems = btrfs_header_nritems(leaf); - } - - btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); - - if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) { - if ((key.objectid > ref_path->owner_objectid) || - (key.objectid == ref_path->owner_objectid && - key.type > BTRFS_EXTENT_DATA_KEY) || - key.offset >= search_end) - break; - } - - if (inode && key.objectid != inode->i_ino) { - BUG_ON(extent_locked); - btrfs_release_path(root, path); - mutex_unlock(&inode->i_mutex); - iput(inode); - inode = NULL; - continue; - } - - if (key.type != BTRFS_EXTENT_DATA_KEY) { - path->slots[0]++; - ret = 1; - goto next; - } - fi = btrfs_item_ptr(leaf, path->slots[0], - struct btrfs_file_extent_item); - extent_type = btrfs_file_extent_type(leaf, fi); - if ((extent_type != BTRFS_FILE_EXTENT_REG && - extent_type != BTRFS_FILE_EXTENT_PREALLOC) || - (btrfs_file_extent_disk_bytenr(leaf, fi) != - extent_key->objectid)) { - path->slots[0]++; - ret = 1; - goto next; - } - - num_bytes = btrfs_file_extent_num_bytes(leaf, fi); - ext_offset = btrfs_file_extent_offset(leaf, fi); - - if (search_end == (u64)-1) { - search_end = key.offset - ext_offset + - btrfs_file_extent_ram_bytes(leaf, fi); - } - - if (!extent_locked) { - lock_start = key.offset; - lock_end = lock_start + num_bytes - 1; - } else { - if (lock_start > key.offset || - lock_end + 1 < key.offset + num_bytes) { - unlock_extent(&BTRFS_I(inode)->io_tree, - lock_start, lock_end, GFP_NOFS); - extent_locked = 0; - } - } - - if (!inode) { - btrfs_release_path(root, path); - - inode = btrfs_iget_locked(root->fs_info->sb, - key.objectid, root); - if (inode->i_state & I_NEW) { - BTRFS_I(inode)->root = root; - BTRFS_I(inode)->location.objectid = - key.objectid; - BTRFS_I(inode)->location.type = - BTRFS_INODE_ITEM_KEY; - BTRFS_I(inode)->location.offset = 0; - btrfs_read_locked_inode(inode); - unlock_new_inode(inode); - } - /* - * some code call btrfs_commit_transaction while - * holding the i_mutex, so we can't use mutex_lock - * here. - */ - if (is_bad_inode(inode) || - !mutex_trylock(&inode->i_mutex)) { - iput(inode); - inode = NULL; - key.offset = (u64)-1; - goto skip; - } - } - - if (!extent_locked) { - struct btrfs_ordered_extent *ordered; - - btrfs_release_path(root, path); - - lock_extent(&BTRFS_I(inode)->io_tree, lock_start, - lock_end, GFP_NOFS); - ordered = btrfs_lookup_first_ordered_extent(inode, - lock_end); - if (ordered && - ordered->file_offset <= lock_end && - ordered->file_offset + ordered->len > lock_start) { - unlock_extent(&BTRFS_I(inode)->io_tree, - lock_start, lock_end, GFP_NOFS); - btrfs_start_ordered_extent(inode, ordered, 1); - btrfs_put_ordered_extent(ordered); - key.offset += num_bytes; - goto skip; - } - if (ordered) - btrfs_put_ordered_extent(ordered); - - extent_locked = 1; - continue; - } - - if (nr_extents == 1) { - /* update extent pointer in place */ - btrfs_set_file_extent_disk_bytenr(leaf, fi, - new_extents[0].disk_bytenr); - btrfs_set_file_extent_disk_num_bytes(leaf, fi, - new_extents[0].disk_num_bytes); - btrfs_mark_buffer_dirty(leaf); - - btrfs_drop_extent_cache(inode, key.offset, - key.offset + num_bytes - 1, 0); - - ret = btrfs_inc_extent_ref(trans, root, - new_extents[0].disk_bytenr, - new_extents[0].disk_num_bytes, - leaf->start, - root->root_key.objectid, - trans->transid, - key.objectid); - BUG_ON(ret); - - ret = btrfs_free_extent(trans, root, - extent_key->objectid, - extent_key->offset, - leaf->start, - btrfs_header_owner(leaf), - btrfs_header_generation(leaf), - key.objectid, 0); - BUG_ON(ret); - - btrfs_release_path(root, path); - key.offset += num_bytes; - } else { - BUG_ON(1); -#if 0 - u64 alloc_hint; - u64 extent_len; - int i; - /* - * drop old extent pointer at first, then insert the - * new pointers one bye one - */ - btrfs_release_path(root, path); - ret = btrfs_drop_extents(trans, root, inode, key.offset, - key.offset + num_bytes, - key.offset, &alloc_hint); - BUG_ON(ret); - - for (i = 0; i < nr_extents; i++) { - if (ext_offset >= new_extents[i].num_bytes) { - ext_offset -= new_extents[i].num_bytes; - continue; - } - extent_len = min(new_extents[i].num_bytes - - ext_offset, num_bytes); - - ret = btrfs_insert_empty_item(trans, root, - path, &key, - sizeof(*fi)); - BUG_ON(ret); - - leaf = path->nodes[0]; - fi = btrfs_item_ptr(leaf, path->slots[0], - struct btrfs_file_extent_item); - btrfs_set_file_extent_generation(leaf, fi, - trans->transid); - btrfs_set_file_extent_type(leaf, fi, - BTRFS_FILE_EXTENT_REG); - btrfs_set_file_extent_disk_bytenr(leaf, fi, - new_extents[i].disk_bytenr); - btrfs_set_file_extent_disk_num_bytes(leaf, fi, - new_extents[i].disk_num_bytes); - btrfs_set_file_extent_ram_bytes(leaf, fi, - new_extents[i].ram_bytes); - - btrfs_set_file_extent_compression(leaf, fi, - new_extents[i].compression); - btrfs_set_file_extent_encryption(leaf, fi, - new_extents[i].encryption); - btrfs_set_file_extent_other_encoding(leaf, fi, - new_extents[i].other_encoding); - - btrfs_set_file_extent_num_bytes(leaf, fi, - extent_len); - ext_offset += new_extents[i].offset; - btrfs_set_file_extent_offset(leaf, fi, - ext_offset); - btrfs_mark_buffer_dirty(leaf); - - btrfs_drop_extent_cache(inode, key.offset, - key.offset + extent_len - 1, 0); - - ret = btrfs_inc_extent_ref(trans, root, - new_extents[i].disk_bytenr, - new_extents[i].disk_num_bytes, - leaf->start, - root->root_key.objectid, - trans->transid, key.objectid); - BUG_ON(ret); - btrfs_release_path(root, path); - - inode_add_bytes(inode, extent_len); - - ext_offset = 0; - num_bytes -= extent_len; - key.offset += extent_len; - - if (num_bytes == 0) - break; - } - BUG_ON(i >= nr_extents); -#endif - } - - if (extent_locked) { - unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, - lock_end, GFP_NOFS); - extent_locked = 0; - } -skip: - if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS && - key.offset >= search_end) - break; - - cond_resched(); - } - ret = 0; -out: - btrfs_release_path(root, path); - if (inode) { - mutex_unlock(&inode->i_mutex); - if (extent_locked) { - unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, - lock_end, GFP_NOFS); - } - iput(inode); - } - return ret; -} - -int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct extent_buffer *buf, u64 orig_start) -{ - int level; - int ret; - - BUG_ON(btrfs_header_generation(buf) != trans->transid); - BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); - - level = btrfs_header_level(buf); - if (level == 0) { - struct btrfs_leaf_ref *ref; - struct btrfs_leaf_ref *orig_ref; - - orig_ref = btrfs_lookup_leaf_ref(root, orig_start); - if (!orig_ref) - return -ENOENT; - - ref = btrfs_alloc_leaf_ref(root, orig_ref->nritems); - if (!ref) { - btrfs_free_leaf_ref(root, orig_ref); - return -ENOMEM; - } - - ref->nritems = orig_ref->nritems; - memcpy(ref->extents, orig_ref->extents, - sizeof(ref->extents[0]) * ref->nritems); - - btrfs_free_leaf_ref(root, orig_ref); - - ref->root_gen = trans->transid; - ref->bytenr = buf->start; - ref->owner = btrfs_header_owner(buf); - ref->generation = btrfs_header_generation(buf); - - ret = btrfs_add_leaf_ref(root, ref, 0); - WARN_ON(ret); - btrfs_free_leaf_ref(root, ref); - } - return 0; -} - -static noinline int invalidate_extent_cache(struct btrfs_root *root, - struct extent_buffer *leaf, - struct btrfs_block_group_cache *group, - struct btrfs_root *target_root) -{ - struct btrfs_key key; - struct inode *inode = NULL; - struct btrfs_file_extent_item *fi; - struct extent_state *cached_state = NULL; - u64 num_bytes; - u64 skip_objectid = 0; - u32 nritems; - u32 i; - - nritems = btrfs_header_nritems(leaf); - for (i = 0; i < nritems; i++) { - btrfs_item_key_to_cpu(leaf, &key, i); - if (key.objectid == skip_objectid || - key.type != BTRFS_EXTENT_DATA_KEY) - continue; - fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); - if (btrfs_file_extent_type(leaf, fi) == - BTRFS_FILE_EXTENT_INLINE) - continue; - if (btrfs_file_extent_disk_bytenr(leaf, fi) == 0) - continue; - if (!inode || inode->i_ino != key.objectid) { - iput(inode); - inode = btrfs_ilookup(target_root->fs_info->sb, - key.objectid, target_root, 1); - } - if (!inode) { - skip_objectid = key.objectid; - continue; - } - num_bytes = btrfs_file_extent_num_bytes(leaf, fi); - - lock_extent_bits(&BTRFS_I(inode)->io_tree, key.offset, - key.offset + num_bytes - 1, 0, &cached_state, - GFP_NOFS); - btrfs_drop_extent_cache(inode, key.offset, - key.offset + num_bytes - 1, 1); - unlock_extent_cached(&BTRFS_I(inode)->io_tree, key.offset, - key.offset + num_bytes - 1, &cached_state, - GFP_NOFS); - cond_resched(); - } - iput(inode); - return 0; -} - -static noinline int replace_extents_in_leaf(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct extent_buffer *leaf, - struct btrfs_block_group_cache *group, - struct inode *reloc_inode) -{ - struct btrfs_key key; - struct btrfs_key extent_key; - struct btrfs_file_extent_item *fi; - struct btrfs_leaf_ref *ref; - struct disk_extent *new_extent; - u64 bytenr; - u64 num_bytes; - u32 nritems; - u32 i; - int ext_index; - int nr_extent; - int ret; - - new_extent = kmalloc(sizeof(*new_extent), GFP_NOFS); - if (!new_extent) - return -ENOMEM; - - ref = btrfs_lookup_leaf_ref(root, leaf->start); - BUG_ON(!ref); - - ext_index = -1; - nritems = btrfs_header_nritems(leaf); - for (i = 0; i < nritems; i++) { - btrfs_item_key_to_cpu(leaf, &key, i); - if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) - continue; - fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); - if (btrfs_file_extent_type(leaf, fi) == - BTRFS_FILE_EXTENT_INLINE) - continue; - bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); - num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); - if (bytenr == 0) - continue; - - ext_index++; - if (bytenr >= group->key.objectid + group->key.offset || - bytenr + num_bytes <= group->key.objectid) - continue; - - extent_key.objectid = bytenr; - extent_key.offset = num_bytes; - extent_key.type = BTRFS_EXTENT_ITEM_KEY; - nr_extent = 1; - ret = get_new_locations(reloc_inode, &extent_key, - group->key.objectid, 1, - &new_extent, &nr_extent); - if (ret > 0) - continue; - BUG_ON(ret < 0); - - BUG_ON(ref->extents[ext_index].bytenr != bytenr); - BUG_ON(ref->extents[ext_index].num_bytes != num_bytes); - ref->extents[ext_index].bytenr = new_extent->disk_bytenr; - ref->extents[ext_index].num_bytes = new_extent->disk_num_bytes; - - btrfs_set_file_extent_disk_bytenr(leaf, fi, - new_extent->disk_bytenr); - btrfs_set_file_extent_disk_num_bytes(leaf, fi, - new_extent->disk_num_bytes); - btrfs_mark_buffer_dirty(leaf); - - ret = btrfs_inc_extent_ref(trans, root, - new_extent->disk_bytenr, - new_extent->disk_num_bytes, - leaf->start, - root->root_key.objectid, - trans->transid, key.objectid); - BUG_ON(ret); - - ret = btrfs_free_extent(trans, root, - bytenr, num_bytes, leaf->start, - btrfs_header_owner(leaf), - btrfs_header_generation(leaf), - key.objectid, 0); - BUG_ON(ret); - cond_resched(); - } - kfree(new_extent); - BUG_ON(ext_index + 1 != ref->nritems); - btrfs_free_leaf_ref(root, ref); - return 0; -} - -int btrfs_free_reloc_root(struct btrfs_trans_handle *trans, - struct btrfs_root *root) -{ - struct btrfs_root *reloc_root; - int ret; - - if (root->reloc_root) { - reloc_root = root->reloc_root; - root->reloc_root = NULL; - list_add(&reloc_root->dead_list, - &root->fs_info->dead_reloc_roots); - - btrfs_set_root_bytenr(&reloc_root->root_item, - reloc_root->node->start); - btrfs_set_root_level(&root->root_item, - btrfs_header_level(reloc_root->node)); - memset(&reloc_root->root_item.drop_progress, 0, - sizeof(struct btrfs_disk_key)); - reloc_root->root_item.drop_level = 0; - - ret = btrfs_update_root(trans, root->fs_info->tree_root, - &reloc_root->root_key, - &reloc_root->root_item); - BUG_ON(ret); - } - return 0; -} - -int btrfs_drop_dead_reloc_roots(struct btrfs_root *root) -{ - struct btrfs_trans_handle *trans; - struct btrfs_root *reloc_root; - struct btrfs_root *prev_root = NULL; - struct list_head dead_roots; - int ret; - unsigned long nr; - - INIT_LIST_HEAD(&dead_roots); - list_splice_init(&root->fs_info->dead_reloc_roots, &dead_roots); - - while (!list_empty(&dead_roots)) { - reloc_root = list_entry(dead_roots.prev, - struct btrfs_root, dead_list); - list_del_init(&reloc_root->dead_list); - - BUG_ON(reloc_root->commit_root != NULL); - while (1) { - trans = btrfs_join_transaction(root, 1); - BUG_ON(IS_ERR(trans)); - - mutex_lock(&root->fs_info->drop_mutex); - ret = btrfs_drop_snapshot(trans, reloc_root); - if (ret != -EAGAIN) - break; - mutex_unlock(&root->fs_info->drop_mutex); - - nr = trans->blocks_used; - ret = btrfs_end_transaction(trans, root); - BUG_ON(ret); - btrfs_btree_balance_dirty(root, nr); - } - - free_extent_buffer(reloc_root->node); - - ret = btrfs_del_root(trans, root->fs_info->tree_root, - &reloc_root->root_key); - BUG_ON(ret); - mutex_unlock(&root->fs_info->drop_mutex); - - nr = trans->blocks_used; - ret = btrfs_end_transaction(trans, root); - BUG_ON(ret); - btrfs_btree_balance_dirty(root, nr); - - kfree(prev_root); - prev_root = reloc_root; - } - if (prev_root) { - btrfs_remove_leaf_refs(prev_root, (u64)-1, 0); - kfree(prev_root); - } - return 0; -} - -int btrfs_add_dead_reloc_root(struct btrfs_root *root) -{ - list_add(&root->dead_list, &root->fs_info->dead_reloc_roots); - return 0; -} - -int btrfs_cleanup_reloc_trees(struct btrfs_root *root) -{ - struct btrfs_root *reloc_root; - struct btrfs_trans_handle *trans; - struct btrfs_key location; - int found; - int ret; - - mutex_lock(&root->fs_info->tree_reloc_mutex); - ret = btrfs_find_dead_roots(root, BTRFS_TREE_RELOC_OBJECTID, NULL); - BUG_ON(ret); - found = !list_empty(&root->fs_info->dead_reloc_roots); - mutex_unlock(&root->fs_info->tree_reloc_mutex); - - if (found) { - trans = btrfs_start_transaction(root, 1); - BUG_ON(IS_ERR(trans)); - ret = btrfs_commit_transaction(trans, root); - BUG_ON(ret); - } - - location.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID; - location.offset = (u64)-1; - location.type = BTRFS_ROOT_ITEM_KEY; - - reloc_root = btrfs_read_fs_root_no_name(root->fs_info, &location); - BUG_ON(!reloc_root); - ret = btrfs_orphan_cleanup(reloc_root); - BUG_ON(ret); - return 0; -} - -static noinline int init_reloc_tree(struct btrfs_trans_handle *trans, - struct btrfs_root *root) -{ - struct btrfs_root *reloc_root; - struct extent_buffer *eb; - struct btrfs_root_item *root_item; - struct btrfs_key root_key; - int ret; - - BUG_ON(!root->ref_cows); - if (root->reloc_root) - return 0; - - root_item = kmalloc(sizeof(*root_item), GFP_NOFS); - if (!root_item) - return -ENOMEM; - - ret = btrfs_copy_root(trans, root, root->commit_root, - &eb, BTRFS_TREE_RELOC_OBJECTID); - BUG_ON(ret); - - root_key.objectid = BTRFS_TREE_RELOC_OBJECTID; - root_key.offset = root->root_key.objectid; - root_key.type = BTRFS_ROOT_ITEM_KEY; - - memcpy(root_item, &root->root_item, sizeof(root_item)); - btrfs_set_root_refs(root_item, 0); - btrfs_set_root_bytenr(root_item, eb->start); - btrfs_set_root_level(root_item, btrfs_header_level(eb)); - btrfs_set_root_generation(root_item, trans->transid); - - btrfs_tree_unlock(eb); - free_extent_buffer(eb); - - ret = btrfs_insert_root(trans, root->fs_info->tree_root, - &root_key, root_item); - BUG_ON(ret); - kfree(root_item); - - reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root, - &root_key); - BUG_ON(IS_ERR(reloc_root)); - reloc_root->last_trans = trans->transid; - reloc_root->commit_root = NULL; - reloc_root->ref_tree = &root->fs_info->reloc_ref_tree; - - root->reloc_root = reloc_root; - return 0; -} - -/* - * Core function of space balance. - * - * The idea is using reloc trees to relocate tree blocks in reference - * counted roots. There is one reloc tree for each subvol, and all - * reloc trees share same root key objectid. Reloc trees are snapshots - * of the latest committed roots of subvols (root->commit_root). - * - * To relocate a tree block referenced by a subvol, there are two steps. - * COW the block through subvol's reloc tree, then update block pointer - * in the subvol to point to the new block. Since all reloc trees share - * same root key objectid, doing special handing for tree blocks owned - * by them is easy. Once a tree block has been COWed in one reloc tree, - * we can use the resulting new block directly when the same block is - * required to COW again through other reloc trees. By this way, relocated - * tree blocks are shared between reloc trees, so they are also shared - * between subvols. - */ -static noinline int relocate_one_path(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, - struct btrfs_key *first_key, - struct btrfs_ref_path *ref_path, - struct btrfs_block_group_cache *group, - struct inode *reloc_inode) -{ - struct btrfs_root *reloc_root; - struct extent_buffer *eb = NULL; - struct btrfs_key *keys; - u64 *nodes; - int level; - int shared_level; - int lowest_level = 0; - int ret; - - if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID) - lowest_level = ref_path->owner_objectid; - - if (!root->ref_cows) { - path->lowest_level = lowest_level; - ret = btrfs_search_slot(trans, root, first_key, path, 0, 1); - BUG_ON(ret < 0); - path->lowest_level = 0; - btrfs_release_path(root, path); - return 0; - } - - mutex_lock(&root->fs_info->tree_reloc_mutex); - ret = init_reloc_tree(trans, root); - BUG_ON(ret); - reloc_root = root->reloc_root; - - shared_level = ref_path->shared_level; - ref_path->shared_level = BTRFS_MAX_LEVEL - 1; - - keys = ref_path->node_keys; - nodes = ref_path->new_nodes; - memset(&keys[shared_level + 1], 0, - sizeof(*keys) * (BTRFS_MAX_LEVEL - shared_level - 1)); - memset(&nodes[shared_level + 1], 0, - sizeof(*nodes) * (BTRFS_MAX_LEVEL - shared_level - 1)); - - if (nodes[lowest_level] == 0) { - path->lowest_level = lowest_level; - ret = btrfs_search_slot(trans, reloc_root, first_key, path, - 0, 1); - BUG_ON(ret); - for (level = lowest_level; level < BTRFS_MAX_LEVEL; level++) { - eb = path->nodes[level]; - if (!eb || eb == reloc_root->node) - break; - nodes[level] = eb->start; - if (level == 0) - btrfs_item_key_to_cpu(eb, &keys[level], 0); - else - btrfs_node_key_to_cpu(eb, &keys[level], 0); - } - if (nodes[0] && - ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { - eb = path->nodes[0]; - ret = replace_extents_in_leaf(trans, reloc_root, eb, - group, reloc_inode); - BUG_ON(ret); - } - btrfs_release_path(reloc_root, path); - } else { - ret = btrfs_merge_path(trans, reloc_root, keys, nodes, - lowest_level); - BUG_ON(ret); - } - - /* - * replace tree blocks in the fs tree with tree blocks in - * the reloc tree. - */ - ret = btrfs_merge_path(trans, root, keys, nodes, lowest_level); - BUG_ON(ret < 0); - - if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { - ret = btrfs_search_slot(trans, reloc_root, first_key, path, - 0, 0); - BUG_ON(ret); - extent_buffer_get(path->nodes[0]); - eb = path->nodes[0]; - btrfs_release_path(reloc_root, path); - ret = invalidate_extent_cache(reloc_root, eb, group, root); - BUG_ON(ret); - free_extent_buffer(eb); - } - - mutex_unlock(&root->fs_info->tree_reloc_mutex); - path->lowest_level = 0; - return 0; -} - -static noinline int relocate_tree_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, - struct btrfs_key *first_key, - struct btrfs_ref_path *ref_path) -{ - int ret; - - ret = relocate_one_path(trans, root, path, first_key, - ref_path, NULL, NULL); - BUG_ON(ret); - - return 0; -} - -static noinline int del_extent_zero(struct btrfs_trans_handle *trans, - struct btrfs_root *extent_root, - struct btrfs_path *path, - struct btrfs_key *extent_key) -{ - int ret; - - ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1); - if (ret) - goto out; - ret = btrfs_del_item(trans, extent_root, path); -out: - btrfs_release_path(extent_root, path); - return ret; -} - -static noinline struct btrfs_root *read_ref_root(struct btrfs_fs_info *fs_info, - struct btrfs_ref_path *ref_path) -{ - struct btrfs_key root_key; - - root_key.objectid = ref_path->root_objectid; - root_key.type = BTRFS_ROOT_ITEM_KEY; - if (is_cowonly_root(ref_path->root_objectid)) - root_key.offset = 0; - else - root_key.offset = (u64)-1; - - return btrfs_read_fs_root_no_name(fs_info, &root_key); -} - -static noinline int relocate_one_extent(struct btrfs_root *extent_root, - struct btrfs_path *path, - struct btrfs_key *extent_key, - struct btrfs_block_group_cache *group, - struct inode *reloc_inode, int pass) -{ - struct btrfs_trans_handle *trans; - struct btrfs_root *found_root; - struct btrfs_ref_path *ref_path = NULL; - struct disk_extent *new_extents = NULL; - int nr_extents = 0; - int loops; - int ret; - int level; - struct btrfs_key first_key; - u64 prev_block = 0; - - - trans = btrfs_start_transaction(extent_root, 1); - BUG_ON(IS_ERR(trans)); - - if (extent_key->objectid == 0) { - ret = del_extent_zero(trans, extent_root, path, extent_key); - goto out; - } - - ref_path = kmalloc(sizeof(*ref_path), GFP_NOFS); - if (!ref_path) { - ret = -ENOMEM; - goto out; - } - - for (loops = 0; ; loops++) { - if (loops == 0) { - ret = btrfs_first_ref_path(trans, extent_root, ref_path, - extent_key->objectid); - } else { - ret = btrfs_next_ref_path(trans, extent_root, ref_path); - } - if (ret < 0) - goto out; - if (ret > 0) - break; - - if (ref_path->root_objectid == BTRFS_TREE_LOG_OBJECTID || - ref_path->root_objectid == BTRFS_TREE_RELOC_OBJECTID) - continue; - - found_root = read_ref_root(extent_root->fs_info, ref_path); - BUG_ON(!found_root); - /* - * for reference counted tree, only process reference paths - * rooted at the latest committed root. - */ - if (found_root->ref_cows && - ref_path->root_generation != found_root->root_key.offset) - continue; - - if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { - if (pass == 0) { - /* - * copy data extents to new locations - */ - u64 group_start = group->key.objectid; - ret = relocate_data_extent(reloc_inode, - extent_key, - group_start); - if (ret < 0) - goto out; - break; - } - level = 0; - } else { - level = ref_path->owner_objectid; - } - - if (prev_block != ref_path->nodes[level]) { - struct extent_buffer *eb; - u64 block_start = ref_path->nodes[level]; - u64 block_size = btrfs_level_size(found_root, level); - - eb = read_tree_block(found_root, block_start, - block_size, 0); - if (!eb) { - ret = -EIO; - goto out; - } - btrfs_tree_lock(eb); - BUG_ON(level != btrfs_header_level(eb)); - - if (level == 0) - btrfs_item_key_to_cpu(eb, &first_key, 0); - else - btrfs_node_key_to_cpu(eb, &first_key, 0); - - btrfs_tree_unlock(eb); - free_extent_buffer(eb); - prev_block = block_start; - } - - mutex_lock(&extent_root->fs_info->trans_mutex); - btrfs_record_root_in_trans(found_root); - mutex_unlock(&extent_root->fs_info->trans_mutex); - if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { - /* - * try to update data extent references while - * keeping metadata shared between snapshots. - */ - if (pass == 1) { - ret = relocate_one_path(trans, found_root, - path, &first_key, ref_path, - group, reloc_inode); - if (ret < 0) - goto out; - continue; - } - /* - * use fallback method to process the remaining - * references. - */ - if (!new_extents) { - u64 group_start = group->key.objectid; - new_extents = kmalloc(sizeof(*new_extents), - GFP_NOFS); - if (!new_extents) { - ret = -ENOMEM; - goto out; - } - nr_extents = 1; - ret = get_new_locations(reloc_inode, - extent_key, - group_start, 1, - &new_extents, - &nr_extents); - if (ret) - goto out; - } - ret = replace_one_extent(trans, found_root, - path, extent_key, - &first_key, ref_path, - new_extents, nr_extents); - } else { - ret = relocate_tree_block(trans, found_root, path, - &first_key, ref_path); - } - if (ret < 0) - goto out; - } - ret = 0; -out: - btrfs_end_transaction(trans, extent_root); - kfree(new_extents); - kfree(ref_path); - return ret; -} -#endif - static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) { u64 num_devices; -- cgit v1.2.3 From 16cdcec736cd214350cdb591bf1091f8beedefa0 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Fri, 22 Apr 2011 18:12:22 +0800 Subject: btrfs: implement delayed inode items operation Changelog V5 -> V6: - Fix oom when the memory load is high, by storing the delayed nodes into the root's radix tree, and letting btrfs inodes go. Changelog V4 -> V5: - Fix the race on adding the delayed node to the inode, which is spotted by Chris Mason. - Merge Chris Mason's incremental patch into this patch. - Fix deadlock between readdir() and memory fault, which is reported by Itaru Kitayama. Changelog V3 -> V4: - Fix nested lock, which is reported by Itaru Kitayama, by updating space cache inode in time. Changelog V2 -> V3: - Fix the race between the delayed worker and the task which does delayed items balance, which is reported by Tsutomu Itoh. - Modify the patch address David Sterba's comment. - Fix the bug of the cpu recursion spinlock, reported by Chris Mason Changelog V1 -> V2: - break up the global rb-tree, use a list to manage the delayed nodes, which is created for every directory and file, and used to manage the delayed directory name index items and the delayed inode item. - introduce a worker to deal with the delayed nodes. Compare with Ext3/4, the performance of file creation and deletion on btrfs is very poor. the reason is that btrfs must do a lot of b+ tree insertions, such as inode item, directory name item, directory name index and so on. If we can do some delayed b+ tree insertion or deletion, we can improve the performance, so we made this patch which implemented delayed directory name index insertion/deletion and delayed inode update. Implementation: - introduce a delayed root object into the filesystem, that use two lists to manage the delayed nodes which are created for every file/directory. One is used to manage all the delayed nodes that have delayed items. And the other is used to manage the delayed nodes which is waiting to be dealt with by the work thread. - Every delayed node has two rb-tree, one is used to manage the directory name index which is going to be inserted into b+ tree, and the other is used to manage the directory name index which is going to be deleted from b+ tree. - introduce a worker to deal with the delayed operation. This worker is used to deal with the works of the delayed directory name index items insertion and deletion and the delayed inode update. When the delayed items is beyond the lower limit, we create works for some delayed nodes and insert them into the work queue of the worker, and then go back. When the delayed items is beyond the upper bound, we create works for all the delayed nodes that haven't been dealt with, and insert them into the work queue of the worker, and then wait for that the untreated items is below some threshold value. - When we want to insert a directory name index into b+ tree, we just add the information into the delayed inserting rb-tree. And then we check the number of the delayed items and do delayed items balance. (The balance policy is above.) - When we want to delete a directory name index from the b+ tree, we search it in the inserting rb-tree at first. If we look it up, just drop it. If not, add the key of it into the delayed deleting rb-tree. Similar to the delayed inserting rb-tree, we also check the number of the delayed items and do delayed items balance. (The same to inserting manipulation) - When we want to update the metadata of some inode, we cached the data of the inode into the delayed node. the worker will flush it into the b+ tree after dealing with the delayed insertion and deletion. - We will move the delayed node to the tail of the list after we access the delayed node, By this way, we can cache more delayed items and merge more inode updates. - If we want to commit transaction, we will deal with all the delayed node. - the delayed node will be freed when we free the btrfs inode. - Before we log the inode items, we commit all the directory name index items and the delayed inode update. I did a quick test by the benchmark tool[1] and found we can improve the performance of file creation by ~15%, and file deletion by ~20%. Before applying this patch: Create files: Total files: 50000 Total time: 1.096108 Average time: 0.000022 Delete files: Total files: 50000 Total time: 1.510403 Average time: 0.000030 After applying this patch: Create files: Total files: 50000 Total time: 0.932899 Average time: 0.000019 Delete files: Total files: 50000 Total time: 1.215732 Average time: 0.000024 [1] http://marc.info/?l=linux-btrfs&m=128212635122920&q=p3 Many thanks for Kitayama-san's help! Signed-off-by: Miao Xie Reviewed-by: David Sterba Tested-by: Tsutomu Itoh Tested-by: Itaru Kitayama Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 9ee6bd55e16c..7b0433866f36 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3973,12 +3973,6 @@ static void release_global_block_rsv(struct btrfs_fs_info *fs_info) WARN_ON(fs_info->chunk_block_rsv.reserved > 0); } -static u64 calc_trans_metadata_size(struct btrfs_root *root, int num_items) -{ - return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) * - 3 * num_items; -} - int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, struct btrfs_root *root, int num_items) @@ -3989,7 +3983,7 @@ int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, if (num_items == 0 || root->fs_info->chunk_root == root) return 0; - num_bytes = calc_trans_metadata_size(root, num_items); + num_bytes = btrfs_calc_trans_metadata_size(root, num_items); ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv, num_bytes); if (!ret) { @@ -4028,14 +4022,14 @@ int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, * If all of the metadata space is used, we can commit * transaction and use space it freed. */ - u64 num_bytes = calc_trans_metadata_size(root, 4); + u64 num_bytes = btrfs_calc_trans_metadata_size(root, 4); return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); } void btrfs_orphan_release_metadata(struct inode *inode) { struct btrfs_root *root = BTRFS_I(inode)->root; - u64 num_bytes = calc_trans_metadata_size(root, 4); + u64 num_bytes = btrfs_calc_trans_metadata_size(root, 4); btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes); } @@ -4049,7 +4043,7 @@ int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans, * two for root back/forward refs, two for directory entries * and one for root of the snapshot. */ - u64 num_bytes = calc_trans_metadata_size(root, 5); + u64 num_bytes = btrfs_calc_trans_metadata_size(root, 5); dst_rsv->space_info = src_rsv->space_info; return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); } @@ -4078,7 +4072,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) if (nr_extents > reserved_extents) { nr_extents -= reserved_extents; - to_reserve = calc_trans_metadata_size(root, nr_extents); + to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents); } else { nr_extents = 0; to_reserve = 0; @@ -4132,7 +4126,7 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) to_free = calc_csum_metadata_size(inode, num_bytes); if (nr_extents > 0) - to_free += calc_trans_metadata_size(root, nr_extents); + to_free += btrfs_calc_trans_metadata_size(root, nr_extents); btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, to_free); -- cgit v1.2.3 From c4f675cd40d955d539180506c09515c90169b15b Mon Sep 17 00:00:00 2001 From: Sergei Trofimovich Date: Fri, 20 May 2011 20:20:30 +0000 Subject: btrfs: don't spin in shrink_delalloc if there is nothing to free Observed as a large delay when --mixed filesystem is filled up. Test example: 1. create tiny --mixed FS: $ dd if=/dev/zero of=2G.img seek=$((2048 * 1024 * 1024 - 1)) count=1 bs=1 $ mkfs.btrfs --mixed 2G.img $ mount -oloop 2G.img /mnt/ut/ 2. Try to fill it up: $ dd if=/dev/urandom of=10M.file bs=10240 count=1024 $ seq 1 256 | while read file_no; do echo $file_no; time cp 10M.file ${file_no}.copy; done Up to '200.copy' it goes fast, but when disk fills-up each -ENOSPC message takes 3 seconds to pop-up _every_ ENOSPC (and in usermode linux it's even more: 30-60 seconds!). (Maybe, time depends on kernel's timer resolution). No IO, no CPU load, just rescheduling. Some debugging revealed busy spinning in shrink_delalloc. Signed-off-by: Sergei Trofimovich Reviewed-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 9ee6bd55e16c..9f5fdd37451d 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3425,6 +3425,10 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, if (reserved == 0) return 0; + /* nothing to shrink - nothing to reclaim */ + if (root->fs_info->delalloc_bytes == 0) + return 0; + max_reclaim = min(reserved, to_reclaim); while (loops < 1024) { -- cgit v1.2.3 From 1cd307990d6e2b4965620e339a92e0d7ae853e13 Mon Sep 17 00:00:00 2001 From: Tsutomu Itoh Date: Thu, 19 May 2011 05:19:08 +0000 Subject: Btrfs: BUG_ON is deleted from the caller of btrfs_truncate_item & btrfs_extend_item Currently, btrfs_truncate_item and btrfs_extend_item returns only 0. So, the check by BUG_ON in the caller is unnecessary. Signed-off-by: Tsutomu Itoh Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 9f5fdd37451d..103e141afeb3 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -947,7 +947,6 @@ static int convert_extent_item_v0(struct btrfs_trans_handle *trans, BUG_ON(ret); ret = btrfs_extend_item(trans, root, path, new_size); - BUG_ON(ret); leaf = path->nodes[0]; item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); @@ -1555,7 +1554,6 @@ int setup_inline_extent_backref(struct btrfs_trans_handle *trans, size = btrfs_extent_inline_ref_size(type); ret = btrfs_extend_item(trans, root, path, size); - BUG_ON(ret); ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); refs = btrfs_extent_refs(leaf, ei); @@ -1684,7 +1682,6 @@ int update_inline_extent_backref(struct btrfs_trans_handle *trans, end - ptr - size); item_size -= size; ret = btrfs_truncate_item(trans, root, path, item_size, 1); - BUG_ON(ret); } btrfs_mark_buffer_dirty(leaf); return 0; -- cgit v1.2.3