summaryrefslogtreecommitdiff
path: root/fs/btrfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/backref.c34
-rw-r--r--fs/btrfs/ctree.c10
-rw-r--r--fs/btrfs/ctree.h6
-rw-r--r--fs/btrfs/disk-io.c27
-rw-r--r--fs/btrfs/disk-io.h3
-rw-r--r--fs/btrfs/extent-tree.c25
-rw-r--r--fs/btrfs/ioctl.c19
-rw-r--r--fs/btrfs/send.c62
8 files changed, 156 insertions, 30 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 78556447e1d5..ef66db38cedb 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -1454,8 +1454,8 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
* callers (such as fiemap) which want to know whether the extent is
* shared but do not need a ref count.
*
- * This attempts to allocate a transaction in order to account for
- * delayed refs, but continues on even when the alloc fails.
+ * This attempts to attach to the running transaction in order to account for
+ * delayed refs, but continues on even when no running transaction exists.
*
* Return: 0 if extent is not shared, 1 if it is shared, < 0 on error.
*/
@@ -1478,13 +1478,16 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr)
tmp = ulist_alloc(GFP_NOFS);
roots = ulist_alloc(GFP_NOFS);
if (!tmp || !roots) {
- ulist_free(tmp);
- ulist_free(roots);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto out;
}
- trans = btrfs_join_transaction(root);
+ trans = btrfs_attach_transaction(root);
if (IS_ERR(trans)) {
+ if (PTR_ERR(trans) != -ENOENT && PTR_ERR(trans) != -EROFS) {
+ ret = PTR_ERR(trans);
+ goto out;
+ }
trans = NULL;
down_read(&fs_info->commit_root_sem);
} else {
@@ -1517,6 +1520,7 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr)
} else {
up_read(&fs_info->commit_root_sem);
}
+out:
ulist_free(tmp);
ulist_free(roots);
return ret;
@@ -1906,13 +1910,19 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
extent_item_objectid);
if (!search_commit_root) {
- trans = btrfs_join_transaction(fs_info->extent_root);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
+ trans = btrfs_attach_transaction(fs_info->extent_root);
+ if (IS_ERR(trans)) {
+ if (PTR_ERR(trans) != -ENOENT &&
+ PTR_ERR(trans) != -EROFS)
+ return PTR_ERR(trans);
+ trans = NULL;
+ }
+ }
+
+ if (trans)
btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
- } else {
+ else
down_read(&fs_info->commit_root_sem);
- }
ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid,
tree_mod_seq_elem.seq, &refs,
@@ -1945,7 +1955,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
free_leaf_list(refs);
out:
- if (!search_commit_root) {
+ if (trans) {
btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
btrfs_end_transaction(trans);
} else {
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 5a6c39b44c84..7672932aa5b4 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -2401,6 +2401,16 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
if (tmp) {
/* first we do an atomic uptodate check */
if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) {
+ /*
+ * Do extra check for first_key, eb can be stale due to
+ * being cached, read from scrub, or have multiple
+ * parents (shared tree blocks).
+ */
+ if (btrfs_verify_level_key(fs_info, tmp,
+ parent_level - 1, &first_key, gen)) {
+ free_extent_buffer(tmp);
+ return -EUCLEAN;
+ }
*eb_ret = tmp;
return 0;
}
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 7a2a2621f0d9..9019265a2bf9 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1316,6 +1316,12 @@ struct btrfs_root {
* manipulation with the read-only status via SUBVOL_SETFLAGS
*/
int send_in_progress;
+ /*
+ * Number of currently running deduplication operations that have a
+ * destination inode belonging to this root. Protected by the lock
+ * root_item_lock.
+ */
+ int dedupe_in_progress;
struct btrfs_subvolume_writers *subv_writers;
atomic_t will_be_snapshotted;
atomic_t snapshot_force_cow;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 888d72dda794..90a3c50d751b 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -414,9 +414,9 @@ static int btrfs_check_super_csum(struct btrfs_fs_info *fs_info,
return ret;
}
-static int verify_level_key(struct btrfs_fs_info *fs_info,
- struct extent_buffer *eb, int level,
- struct btrfs_key *first_key, u64 parent_transid)
+int btrfs_verify_level_key(struct btrfs_fs_info *fs_info,
+ struct extent_buffer *eb, int level,
+ struct btrfs_key *first_key, u64 parent_transid)
{
int found_level;
struct btrfs_key found_key;
@@ -493,8 +493,8 @@ static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info,
if (verify_parent_transid(io_tree, eb,
parent_transid, 0))
ret = -EIO;
- else if (verify_level_key(fs_info, eb, level,
- first_key, parent_transid))
+ else if (btrfs_verify_level_key(fs_info, eb, level,
+ first_key, parent_transid))
ret = -EUCLEAN;
else
break;
@@ -1017,13 +1017,18 @@ void readahead_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr)
{
struct extent_buffer *buf = NULL;
struct inode *btree_inode = fs_info->btree_inode;
+ int ret;
buf = btrfs_find_create_tree_block(fs_info, bytenr);
if (IS_ERR(buf))
return;
- read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree,
- buf, WAIT_NONE, 0);
- free_extent_buffer(buf);
+
+ ret = read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, buf,
+ WAIT_NONE, 0);
+ if (ret < 0)
+ free_extent_buffer_stale(buf);
+ else
+ free_extent_buffer(buf);
}
int reada_tree_block_flagged(struct btrfs_fs_info *fs_info, u64 bytenr,
@@ -1043,12 +1048,12 @@ int reada_tree_block_flagged(struct btrfs_fs_info *fs_info, u64 bytenr,
ret = read_extent_buffer_pages(io_tree, buf, WAIT_PAGE_LOCK,
mirror_num);
if (ret) {
- free_extent_buffer(buf);
+ free_extent_buffer_stale(buf);
return ret;
}
if (test_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags)) {
- free_extent_buffer(buf);
+ free_extent_buffer_stale(buf);
return -EIO;
} else if (extent_buffer_uptodate(buf)) {
*eb = buf;
@@ -1102,7 +1107,7 @@ struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
ret = btree_read_extent_buffer_pages(fs_info, buf, parent_transid,
level, first_key);
if (ret) {
- free_extent_buffer(buf);
+ free_extent_buffer_stale(buf);
return ERR_PTR(ret);
}
return buf;
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 987a64bc0c66..67a9fe2d29c7 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -39,6 +39,9 @@ static inline u64 btrfs_sb_offset(int mirror)
struct btrfs_device;
struct btrfs_fs_devices;
+int btrfs_verify_level_key(struct btrfs_fs_info *fs_info,
+ struct extent_buffer *eb, int level,
+ struct btrfs_key *first_key, u64 parent_transid);
struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
u64 parent_transid, int level,
struct btrfs_key *first_key);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 1b68700bc1c5..a19bbfce449e 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -11192,9 +11192,9 @@ int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
* held back allocations.
*/
static int btrfs_trim_free_extents(struct btrfs_device *device,
- u64 minlen, u64 *trimmed)
+ struct fstrim_range *range, u64 *trimmed)
{
- u64 start = 0, len = 0;
+ u64 start = range->start, len = 0;
int ret;
*trimmed = 0;
@@ -11237,8 +11237,8 @@ static int btrfs_trim_free_extents(struct btrfs_device *device,
if (!trans)
up_read(&fs_info->commit_root_sem);
- ret = find_free_dev_extent_start(trans, device, minlen, start,
- &start, &len);
+ ret = find_free_dev_extent_start(trans, device, range->minlen,
+ start, &start, &len);
if (trans) {
up_read(&fs_info->commit_root_sem);
btrfs_put_transaction(trans);
@@ -11251,6 +11251,16 @@ static int btrfs_trim_free_extents(struct btrfs_device *device,
break;
}
+ /* If we are out of the passed range break */
+ if (start > range->start + range->len - 1) {
+ mutex_unlock(&fs_info->chunk_mutex);
+ ret = 0;
+ break;
+ }
+
+ start = max(range->start, start);
+ len = min(range->len, len);
+
ret = btrfs_issue_discard(device->bdev, start, len, &bytes);
mutex_unlock(&fs_info->chunk_mutex);
@@ -11260,6 +11270,10 @@ static int btrfs_trim_free_extents(struct btrfs_device *device,
start += len;
*trimmed += bytes;
+ /* We've trimmed enough */
+ if (*trimmed >= range->len)
+ break;
+
if (fatal_signal_pending(current)) {
ret = -ERESTARTSYS;
break;
@@ -11343,8 +11357,7 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
mutex_lock(&fs_info->fs_devices->device_list_mutex);
devices = &fs_info->fs_devices->devices;
list_for_each_entry(device, devices, dev_list) {
- ret = btrfs_trim_free_extents(device, range->minlen,
- &group_trimmed);
+ ret = btrfs_trim_free_extents(device, range, &group_trimmed);
if (ret) {
dev_failed++;
dev_ret = ret;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 1d64a6b8e413..679303bf8e74 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3275,6 +3275,19 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
int ret;
int num_pages = PAGE_ALIGN(BTRFS_MAX_DEDUPE_LEN) >> PAGE_SHIFT;
u64 i, tail_len, chunk_count;
+ struct btrfs_root *root_dst = BTRFS_I(dst)->root;
+
+ spin_lock(&root_dst->root_item_lock);
+ if (root_dst->send_in_progress) {
+ btrfs_warn_rl(root_dst->fs_info,
+"cannot deduplicate to root %llu while send operations are using it (%d in progress)",
+ root_dst->root_key.objectid,
+ root_dst->send_in_progress);
+ spin_unlock(&root_dst->root_item_lock);
+ return -EAGAIN;
+ }
+ root_dst->dedupe_in_progress++;
+ spin_unlock(&root_dst->root_item_lock);
/* don't make the dst file partly checksummed */
if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) !=
@@ -3293,7 +3306,7 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
ret = btrfs_extent_same_range(src, loff, BTRFS_MAX_DEDUPE_LEN,
dst, dst_loff);
if (ret)
- return ret;
+ goto out;
loff += BTRFS_MAX_DEDUPE_LEN;
dst_loff += BTRFS_MAX_DEDUPE_LEN;
@@ -3302,6 +3315,10 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
if (tail_len > 0)
ret = btrfs_extent_same_range(src, loff, tail_len, dst,
dst_loff);
+out:
+ spin_lock(&root_dst->root_item_lock);
+ root_dst->dedupe_in_progress--;
+ spin_unlock(&root_dst->root_item_lock);
return ret;
}
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 7ea2d6b1f170..19b00b1668ed 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -6579,6 +6579,38 @@ commit_trans:
return btrfs_commit_transaction(trans);
}
+/*
+ * Make sure any existing dellaloc is flushed for any root used by a send
+ * operation so that we do not miss any data and we do not race with writeback
+ * finishing and changing a tree while send is using the tree. This could
+ * happen if a subvolume is in RW mode, has delalloc, is turned to RO mode and
+ * a send operation then uses the subvolume.
+ * After flushing delalloc ensure_commit_roots_uptodate() must be called.
+ */
+static int flush_delalloc_roots(struct send_ctx *sctx)
+{
+ struct btrfs_root *root = sctx->parent_root;
+ int ret;
+ int i;
+
+ if (root) {
+ ret = btrfs_start_delalloc_snapshot(root);
+ if (ret)
+ return ret;
+ btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX);
+ }
+
+ for (i = 0; i < sctx->clone_roots_cnt; i++) {
+ root = sctx->clone_roots[i].root;
+ ret = btrfs_start_delalloc_snapshot(root);
+ if (ret)
+ return ret;
+ btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX);
+ }
+
+ return 0;
+}
+
static void btrfs_root_dec_send_in_progress(struct btrfs_root* root)
{
spin_lock(&root->root_item_lock);
@@ -6594,6 +6626,13 @@ static void btrfs_root_dec_send_in_progress(struct btrfs_root* root)
spin_unlock(&root->root_item_lock);
}
+static void dedupe_in_progress_warn(const struct btrfs_root *root)
+{
+ btrfs_warn_rl(root->fs_info,
+"cannot use root %llu for send while deduplications on it are in progress (%d in progress)",
+ root->root_key.objectid, root->dedupe_in_progress);
+}
+
long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
{
int ret = 0;
@@ -6617,6 +6656,11 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
* making it RW. This also protects against deletion.
*/
spin_lock(&send_root->root_item_lock);
+ if (btrfs_root_readonly(send_root) && send_root->dedupe_in_progress) {
+ dedupe_in_progress_warn(send_root);
+ spin_unlock(&send_root->root_item_lock);
+ return -EAGAIN;
+ }
send_root->send_in_progress++;
spin_unlock(&send_root->root_item_lock);
@@ -6751,6 +6795,13 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
ret = -EPERM;
goto out;
}
+ if (clone_root->dedupe_in_progress) {
+ dedupe_in_progress_warn(clone_root);
+ spin_unlock(&clone_root->root_item_lock);
+ srcu_read_unlock(&fs_info->subvol_srcu, index);
+ ret = -EAGAIN;
+ goto out;
+ }
clone_root->send_in_progress++;
spin_unlock(&clone_root->root_item_lock);
srcu_read_unlock(&fs_info->subvol_srcu, index);
@@ -6785,6 +6836,13 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
ret = -EPERM;
goto out;
}
+ if (sctx->parent_root->dedupe_in_progress) {
+ dedupe_in_progress_warn(sctx->parent_root);
+ spin_unlock(&sctx->parent_root->root_item_lock);
+ srcu_read_unlock(&fs_info->subvol_srcu, index);
+ ret = -EAGAIN;
+ goto out;
+ }
spin_unlock(&sctx->parent_root->root_item_lock);
srcu_read_unlock(&fs_info->subvol_srcu, index);
@@ -6803,6 +6861,10 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
NULL);
sort_clone_roots = 1;
+ ret = flush_delalloc_roots(sctx);
+ if (ret)
+ goto out;
+
ret = ensure_commit_roots_uptodate(sctx);
if (ret)
goto out;