summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorIgor Opaniuk <igor.opaniuk@toradex.com>2020-11-17 11:31:57 +0200
committerIgor Opaniuk <igor.opaniuk@toradex.com>2020-11-17 11:31:57 +0200
commit664411bde9c033778f85f9ae3a74351406642f6a (patch)
tree9e981b04895ab1b0fe6cbe6b5a89af256632fd80 /fs
parent14655070177685c8b390e1caec15da757228be1a (diff)
parent2544d06afd8d060f35b159809274e4b7477e63e8 (diff)
Merge tag 'v5.4.77' into toradex_5.4.y
This is the 5.4.77 stable release
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_file.c4
-rw-r--r--fs/btrfs/ctree.c6
-rw-r--r--fs/btrfs/ctree.h2
-rw-r--r--fs/btrfs/delayed-inode.c3
-rw-r--r--fs/btrfs/dev-replace.c7
-rw-r--r--fs/btrfs/reada.c47
-rw-r--r--fs/btrfs/send.c201
-rw-r--r--fs/btrfs/tree-checker.c35
-rw-r--r--fs/btrfs/tree-log.c8
-rw-r--r--fs/btrfs/volumes.c17
-rw-r--r--fs/btrfs/volumes.h1
-rw-r--r--fs/buffer.c16
-rw-r--r--fs/cachefiles/rdwr.c3
-rw-r--r--fs/ceph/addr.c2
-rw-r--r--fs/cifs/inode.c13
-rw-r--r--fs/exec.c17
-rw-r--r--fs/ext4/inode.c11
-rw-r--r--fs/ext4/resize.c4
-rw-r--r--fs/ext4/super.c6
-rw-r--r--fs/f2fs/checkpoint.c10
-rw-r--r--fs/f2fs/dir.c8
-rw-r--r--fs/f2fs/f2fs.h2
-rw-r--r--fs/f2fs/node.c2
-rw-r--r--fs/f2fs/segment.c12
-rw-r--r--fs/gfs2/glock.c3
-rw-r--r--fs/gfs2/incore.h1
-rw-r--r--fs/gfs2/ops_fstype.c40
-rw-r--r--fs/gfs2/super.c1
-rw-r--r--fs/gfs2/sys.c5
-rw-r--r--fs/nfs/namespace.c12
-rw-r--r--fs/nfs/nfs4_fs.h8
-rw-r--r--fs/nfs/nfs4file.c3
-rw-r--r--fs/nfs/nfs4proc.c90
-rw-r--r--fs/nfs/nfs4trace.h1
-rw-r--r--fs/nfsd/nfsproc.c16
-rw-r--r--fs/ubifs/debug.c1
-rw-r--r--fs/ubifs/journal.c6
-rw-r--r--fs/ubifs/orphan.c2
-rw-r--r--fs/ubifs/super.c44
-rw-r--r--fs/ubifs/tnc.c3
-rw-r--r--fs/ubifs/xattr.c2
-rw-r--r--fs/udf/super.c21
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c19
-rw-r--r--fs/xfs/xfs_ioctl.c26
-rw-r--r--fs/xfs/xfs_rtalloc.c10
45 files changed, 552 insertions, 199 deletions
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index fe7f0bd2048e..ee9cabac1204 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -609,9 +609,9 @@ static void v9fs_mmap_vm_close(struct vm_area_struct *vma)
struct writeback_control wbc = {
.nr_to_write = LONG_MAX,
.sync_mode = WB_SYNC_ALL,
- .range_start = vma->vm_pgoff * PAGE_SIZE,
+ .range_start = (loff_t)vma->vm_pgoff * PAGE_SIZE,
/* absolute end, byte at end included */
- .range_end = vma->vm_pgoff * PAGE_SIZE +
+ .range_end = (loff_t)vma->vm_pgoff * PAGE_SIZE +
(vma->vm_end - vma->vm_start - 1),
};
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index c05127f50637..e25133a9e9df 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1103,6 +1103,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
ret = update_ref_for_cow(trans, root, buf, cow, &last_ref);
if (ret) {
+ btrfs_tree_unlock(cow);
+ free_extent_buffer(cow);
btrfs_abort_transaction(trans, ret);
return ret;
}
@@ -1110,6 +1112,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) {
ret = btrfs_reloc_cow_block(trans, root, buf, cow);
if (ret) {
+ btrfs_tree_unlock(cow);
+ free_extent_buffer(cow);
btrfs_abort_transaction(trans, ret);
return ret;
}
@@ -1142,6 +1146,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
if (last_ref) {
ret = tree_mod_log_free_eb(buf);
if (ret) {
+ btrfs_tree_unlock(cow);
+ free_extent_buffer(cow);
btrfs_abort_transaction(trans, ret);
return ret;
}
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 23b4f38e2392..27128164fac9 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3404,6 +3404,8 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root,
int btrfs_reada_wait(void *handle);
void btrfs_reada_detach(void *handle);
int btree_readahead_hook(struct extent_buffer *eb, int err);
+void btrfs_reada_remove_dev(struct btrfs_device *dev);
+void btrfs_reada_undo_remove_dev(struct btrfs_device *dev);
static inline int is_fstree(u64 rootid)
{
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index a34ee9c2f315..bef62b01824d 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -627,8 +627,7 @@ static int btrfs_delayed_inode_reserve_metadata(
*/
if (!src_rsv || (!trans->bytes_reserved &&
src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) {
- ret = btrfs_qgroup_reserve_meta_prealloc(root,
- fs_info->nodesize, true);
+ ret = btrfs_qgroup_reserve_meta_prealloc(root, num_bytes, true);
if (ret < 0)
return ret;
ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes,
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 196bd241e701..96843934dcbb 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -190,7 +190,7 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
int ret = 0;
*device_out = NULL;
- if (fs_info->fs_devices->seeding) {
+ if (srcdev->fs_devices->seeding) {
btrfs_err(fs_info, "the filesystem is a seed filesystem!");
return -EINVAL;
}
@@ -631,6 +631,9 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
}
btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
+ if (!scrub_ret)
+ btrfs_reada_remove_dev(src_device);
+
/*
* We have to use this loop approach because at this point src_device
* has to be available for transaction commit to complete, yet new
@@ -639,6 +642,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
while (1) {
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
+ btrfs_reada_undo_remove_dev(src_device);
mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
return PTR_ERR(trans);
}
@@ -689,6 +693,7 @@ error:
up_write(&dev_replace->rwsem);
mutex_unlock(&fs_info->chunk_mutex);
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+ btrfs_reada_undo_remove_dev(src_device);
btrfs_rm_dev_replace_blocked(fs_info);
if (tgt_device)
btrfs_destroy_dev_replace_tgtdev(tgt_device);
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index 1feaeadc8cf5..2656dc8de99c 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -421,6 +421,9 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
if (!dev->bdev)
continue;
+ if (test_bit(BTRFS_DEV_STATE_NO_READA, &dev->dev_state))
+ continue;
+
if (dev_replace_is_ongoing &&
dev == fs_info->dev_replace.tgtdev) {
/*
@@ -445,6 +448,8 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
}
have_zone = 1;
}
+ if (!have_zone)
+ radix_tree_delete(&fs_info->reada_tree, index);
spin_unlock(&fs_info->reada_lock);
up_read(&fs_info->dev_replace.rwsem);
@@ -1012,3 +1017,45 @@ void btrfs_reada_detach(void *handle)
kref_put(&rc->refcnt, reada_control_release);
}
+
+/*
+ * Before removing a device (device replace or device remove ioctls), call this
+ * function to wait for all existing readahead requests on the device and to
+ * make sure no one queues more readahead requests for the device.
+ *
+ * Must be called without holding neither the device list mutex nor the device
+ * replace semaphore, otherwise it will deadlock.
+ */
+void btrfs_reada_remove_dev(struct btrfs_device *dev)
+{
+ struct btrfs_fs_info *fs_info = dev->fs_info;
+
+ /* Serialize with readahead extent creation at reada_find_extent(). */
+ spin_lock(&fs_info->reada_lock);
+ set_bit(BTRFS_DEV_STATE_NO_READA, &dev->dev_state);
+ spin_unlock(&fs_info->reada_lock);
+
+ /*
+ * There might be readahead requests added to the radix trees which
+ * were not yet added to the readahead work queue. We need to start
+ * them and wait for their completion, otherwise we can end up with
+ * use-after-free problems when dropping the last reference on the
+ * readahead extents and their zones, as they need to access the
+ * device structure.
+ */
+ reada_start_machine(fs_info);
+ btrfs_flush_workqueue(fs_info->readahead_workers);
+}
+
+/*
+ * If when removing a device (device replace or device remove ioctls) an error
+ * happens after calling btrfs_reada_remove_dev(), call this to undo what that
+ * function did. This is safe to call even if btrfs_reada_remove_dev() was not
+ * called before.
+ */
+void btrfs_reada_undo_remove_dev(struct btrfs_device *dev)
+{
+ spin_lock(&dev->fs_info->reada_lock);
+ clear_bit(BTRFS_DEV_STATE_NO_READA, &dev->dev_state);
+ spin_unlock(&dev->fs_info->reada_lock);
+}
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index b0e5dfb9be7a..88940f494428 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -3813,6 +3813,72 @@ static int update_ref_path(struct send_ctx *sctx, struct recorded_ref *ref)
}
/*
+ * When processing the new references for an inode we may orphanize an existing
+ * directory inode because its old name conflicts with one of the new references
+ * of the current inode. Later, when processing another new reference of our
+ * inode, we might need to orphanize another inode, but the path we have in the
+ * reference reflects the pre-orphanization name of the directory we previously
+ * orphanized. For example:
+ *
+ * parent snapshot looks like:
+ *
+ * . (ino 256)
+ * |----- f1 (ino 257)
+ * |----- f2 (ino 258)
+ * |----- d1/ (ino 259)
+ * |----- d2/ (ino 260)
+ *
+ * send snapshot looks like:
+ *
+ * . (ino 256)
+ * |----- d1 (ino 258)
+ * |----- f2/ (ino 259)
+ * |----- f2_link/ (ino 260)
+ * | |----- f1 (ino 257)
+ * |
+ * |----- d2 (ino 258)
+ *
+ * When processing inode 257 we compute the name for inode 259 as "d1", and we
+ * cache it in the name cache. Later when we start processing inode 258, when
+ * collecting all its new references we set a full path of "d1/d2" for its new
+ * reference with name "d2". When we start processing the new references we
+ * start by processing the new reference with name "d1", and this results in
+ * orphanizing inode 259, since its old reference causes a conflict. Then we
+ * move on the next new reference, with name "d2", and we find out we must
+ * orphanize inode 260, as its old reference conflicts with ours - but for the
+ * orphanization we use a source path corresponding to the path we stored in the
+ * new reference, which is "d1/d2" and not "o259-6-0/d2" - this makes the
+ * receiver fail since the path component "d1/" no longer exists, it was renamed
+ * to "o259-6-0/" when processing the previous new reference. So in this case we
+ * must recompute the path in the new reference and use it for the new
+ * orphanization operation.
+ */
+static int refresh_ref_path(struct send_ctx *sctx, struct recorded_ref *ref)
+{
+ char *name;
+ int ret;
+
+ name = kmemdup(ref->name, ref->name_len, GFP_KERNEL);
+ if (!name)
+ return -ENOMEM;
+
+ fs_path_reset(ref->full_path);
+ ret = get_cur_path(sctx, ref->dir, ref->dir_gen, ref->full_path);
+ if (ret < 0)
+ goto out;
+
+ ret = fs_path_add(ref->full_path, name, ref->name_len);
+ if (ret < 0)
+ goto out;
+
+ /* Update the reference's base name pointer. */
+ set_ref_path(ref, ref->full_path);
+out:
+ kfree(name);
+ return ret;
+}
+
+/*
* This does all the move/link/unlink/rmdir magic.
*/
static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
@@ -3880,52 +3946,56 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
goto out;
}
+ /*
+ * Before doing any rename and link operations, do a first pass on the
+ * new references to orphanize any unprocessed inodes that may have a
+ * reference that conflicts with one of the new references of the current
+ * inode. This needs to happen first because a new reference may conflict
+ * with the old reference of a parent directory, so we must make sure
+ * that the path used for link and rename commands don't use an
+ * orphanized name when an ancestor was not yet orphanized.
+ *
+ * Example:
+ *
+ * Parent snapshot:
+ *
+ * . (ino 256)
+ * |----- testdir/ (ino 259)
+ * | |----- a (ino 257)
+ * |
+ * |----- b (ino 258)
+ *
+ * Send snapshot:
+ *
+ * . (ino 256)
+ * |----- testdir_2/ (ino 259)
+ * | |----- a (ino 260)
+ * |
+ * |----- testdir (ino 257)
+ * |----- b (ino 257)
+ * |----- b2 (ino 258)
+ *
+ * Processing the new reference for inode 257 with name "b" may happen
+ * before processing the new reference with name "testdir". If so, we
+ * must make sure that by the time we send a link command to create the
+ * hard link "b", inode 259 was already orphanized, since the generated
+ * path in "valid_path" already contains the orphanized name for 259.
+ * We are processing inode 257, so only later when processing 259 we do
+ * the rename operation to change its temporary (orphanized) name to
+ * "testdir_2".
+ */
list_for_each_entry(cur, &sctx->new_refs, list) {
- /*
- * We may have refs where the parent directory does not exist
- * yet. This happens if the parent directories inum is higher
- * than the current inum. To handle this case, we create the
- * parent directory out of order. But we need to check if this
- * did already happen before due to other refs in the same dir.
- */
ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen);
if (ret < 0)
goto out;
- if (ret == inode_state_will_create) {
- ret = 0;
- /*
- * First check if any of the current inodes refs did
- * already create the dir.
- */
- list_for_each_entry(cur2, &sctx->new_refs, list) {
- if (cur == cur2)
- break;
- if (cur2->dir == cur->dir) {
- ret = 1;
- break;
- }
- }
-
- /*
- * If that did not happen, check if a previous inode
- * did already create the dir.
- */
- if (!ret)
- ret = did_create_dir(sctx, cur->dir);
- if (ret < 0)
- goto out;
- if (!ret) {
- ret = send_create_inode(sctx, cur->dir);
- if (ret < 0)
- goto out;
- }
- }
+ if (ret == inode_state_will_create)
+ continue;
/*
- * Check if this new ref would overwrite the first ref of
- * another unprocessed inode. If yes, orphanize the
- * overwritten inode. If we find an overwritten ref that is
- * not the first ref, simply unlink it.
+ * Check if this new ref would overwrite the first ref of another
+ * unprocessed inode. If yes, orphanize the overwritten inode.
+ * If we find an overwritten ref that is not the first ref,
+ * simply unlink it.
*/
ret = will_overwrite_ref(sctx, cur->dir, cur->dir_gen,
cur->name, cur->name_len,
@@ -3942,6 +4012,12 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
struct name_cache_entry *nce;
struct waiting_dir_move *wdm;
+ if (orphanized_dir) {
+ ret = refresh_ref_path(sctx, cur);
+ if (ret < 0)
+ goto out;
+ }
+
ret = orphanize_inode(sctx, ow_inode, ow_gen,
cur->full_path);
if (ret < 0)
@@ -4004,6 +4080,49 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
}
}
+ }
+
+ list_for_each_entry(cur, &sctx->new_refs, list) {
+ /*
+ * We may have refs where the parent directory does not exist
+ * yet. This happens if the parent directories inum is higher
+ * than the current inum. To handle this case, we create the
+ * parent directory out of order. But we need to check if this
+ * did already happen before due to other refs in the same dir.
+ */
+ ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen);
+ if (ret < 0)
+ goto out;
+ if (ret == inode_state_will_create) {
+ ret = 0;
+ /*
+ * First check if any of the current inodes refs did
+ * already create the dir.
+ */
+ list_for_each_entry(cur2, &sctx->new_refs, list) {
+ if (cur == cur2)
+ break;
+ if (cur2->dir == cur->dir) {
+ ret = 1;
+ break;
+ }
+ }
+
+ /*
+ * If that did not happen, check if a previous inode
+ * did already create the dir.
+ */
+ if (!ret)
+ ret = did_create_dir(sctx, cur->dir);
+ if (ret < 0)
+ goto out;
+ if (!ret) {
+ ret = send_create_inode(sctx, cur->dir);
+ if (ret < 0)
+ goto out;
+ }
+ }
+
if (S_ISDIR(sctx->cur_inode_mode) && sctx->parent_root) {
ret = wait_for_dest_dir_move(sctx, cur, is_orphan);
if (ret < 0)
@@ -7233,7 +7352,7 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
alloc_size = sizeof(struct clone_root) * (arg->clone_sources_count + 1);
- sctx->clone_roots = kzalloc(alloc_size, GFP_KERNEL);
+ sctx->clone_roots = kvzalloc(alloc_size, GFP_KERNEL);
if (!sctx->clone_roots) {
ret = -ENOMEM;
goto out;
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index 84b8d6ebf98f..48e46323d519 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -577,18 +577,36 @@ int btrfs_check_chunk_valid(struct extent_buffer *leaf,
u64 type;
u64 features;
bool mixed = false;
+ int raid_index;
+ int nparity;
+ int ncopies;
length = btrfs_chunk_length(leaf, chunk);
stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
type = btrfs_chunk_type(leaf, chunk);
+ raid_index = btrfs_bg_flags_to_raid_index(type);
+ ncopies = btrfs_raid_array[raid_index].ncopies;
+ nparity = btrfs_raid_array[raid_index].nparity;
if (!num_stripes) {
chunk_err(leaf, chunk, logical,
"invalid chunk num_stripes, have %u", num_stripes);
return -EUCLEAN;
}
+ if (num_stripes < ncopies) {
+ chunk_err(leaf, chunk, logical,
+ "invalid chunk num_stripes < ncopies, have %u < %d",
+ num_stripes, ncopies);
+ return -EUCLEAN;
+ }
+ if (nparity && num_stripes == nparity) {
+ chunk_err(leaf, chunk, logical,
+ "invalid chunk num_stripes == nparity, have %u == %d",
+ num_stripes, nparity);
+ return -EUCLEAN;
+ }
if (!IS_ALIGNED(logical, fs_info->sectorsize)) {
chunk_err(leaf, chunk, logical,
"invalid chunk logical, have %llu should aligned to %u",
@@ -869,7 +887,7 @@ static int check_root_item(struct extent_buffer *leaf, struct btrfs_key *key,
int slot)
{
struct btrfs_fs_info *fs_info = leaf->fs_info;
- struct btrfs_root_item ri;
+ struct btrfs_root_item ri = { 0 };
const u64 valid_root_flags = BTRFS_ROOT_SUBVOL_RDONLY |
BTRFS_ROOT_SUBVOL_DEAD;
@@ -889,14 +907,21 @@ static int check_root_item(struct extent_buffer *leaf, struct btrfs_key *key,
return -EUCLEAN;
}
- if (btrfs_item_size_nr(leaf, slot) != sizeof(ri)) {
+ if (btrfs_item_size_nr(leaf, slot) != sizeof(ri) &&
+ btrfs_item_size_nr(leaf, slot) != btrfs_legacy_root_item_size()) {
generic_err(leaf, slot,
- "invalid root item size, have %u expect %zu",
- btrfs_item_size_nr(leaf, slot), sizeof(ri));
+ "invalid root item size, have %u expect %zu or %u",
+ btrfs_item_size_nr(leaf, slot), sizeof(ri),
+ btrfs_legacy_root_item_size());
}
+ /*
+ * For legacy root item, the members starting at generation_v2 will be
+ * all filled with 0.
+ * And since we allow geneartion_v2 as 0, it will still pass the check.
+ */
read_extent_buffer(leaf, &ri, btrfs_item_ptr_offset(leaf, slot),
- sizeof(ri));
+ btrfs_item_size_nr(leaf, slot));
/* Generation related */
if (btrfs_root_generation(&ri) >
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 7042b84edc89..de53e5166997 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3639,6 +3639,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
* search and this search we'll not find the key again and can just
* bail.
*/
+search:
ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0);
if (ret != 0)
goto done;
@@ -3658,6 +3659,13 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
if (min_key.objectid != ino || min_key.type != key_type)
goto done;
+
+ if (need_resched()) {
+ btrfs_release_path(path);
+ cond_resched();
+ goto search;
+ }
+
ret = overwrite_item(trans, log, dst_path, src, i,
&min_key);
if (ret) {
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index e798caee978e..58910a0a3e4a 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1123,16 +1123,18 @@ static noinline struct btrfs_device *device_list_add(const char *path,
bdput(path_bdev);
mutex_unlock(&fs_devices->device_list_mutex);
btrfs_warn_in_rcu(device->fs_info,
- "duplicate device fsid:devid for %pU:%llu old:%s new:%s",
- disk_super->fsid, devid,
- rcu_str_deref(device->name), path);
+ "duplicate device %s devid %llu generation %llu scanned by %s (%d)",
+ path, devid, found_transid,
+ current->comm,
+ task_pid_nr(current));
return ERR_PTR(-EEXIST);
}
bdput(path_bdev);
btrfs_info_in_rcu(device->fs_info,
- "device fsid %pU devid %llu moved old:%s new:%s",
- disk_super->fsid, devid,
- rcu_str_deref(device->name), path);
+ "devid %llu device path %s changed to %s scanned by %s (%d)",
+ devid, rcu_str_deref(device->name),
+ path, current->comm,
+ task_pid_nr(current));
}
name = rcu_string_strdup(path, GFP_NOFS);
@@ -2206,6 +2208,8 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
mutex_unlock(&uuid_mutex);
ret = btrfs_shrink_device(device, 0);
+ if (!ret)
+ btrfs_reada_remove_dev(device);
mutex_lock(&uuid_mutex);
if (ret)
goto error_undo;
@@ -2292,6 +2296,7 @@ out:
return ret;
error_undo:
+ btrfs_reada_undo_remove_dev(device);
if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
mutex_lock(&fs_info->chunk_mutex);
list_add(&device->dev_alloc_list,
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 5acf5c507ec2..aa6a6d7b2978 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -56,6 +56,7 @@ struct btrfs_io_geometry {
#define BTRFS_DEV_STATE_MISSING (2)
#define BTRFS_DEV_STATE_REPLACE_TGT (3)
#define BTRFS_DEV_STATE_FLUSH_SENT (4)
+#define BTRFS_DEV_STATE_NO_READA (5)
struct btrfs_device {
struct list_head dev_list; /* device_list_mutex */
diff --git a/fs/buffer.c b/fs/buffer.c
index 22d8ac4a8c40..0d7bd7712076 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2739,16 +2739,6 @@ int nobh_writepage(struct page *page, get_block_t *get_block,
/* Is the page fully outside i_size? (truncate in progress) */
offset = i_size & (PAGE_SIZE-1);
if (page->index >= end_index+1 || !offset) {
- /*
- * The page may have dirty, unmapped buffers. For example,
- * they may have been added in ext3_writepage(). Make them
- * freeable here, so the page does not leak.
- */
-#if 0
- /* Not really sure about this - do we need this ? */
- if (page->mapping->a_ops->invalidatepage)
- page->mapping->a_ops->invalidatepage(page, offset);
-#endif
unlock_page(page);
return 0; /* don't care */
}
@@ -2943,12 +2933,6 @@ int block_write_full_page(struct page *page, get_block_t *get_block,
/* Is the page fully outside i_size? (truncate in progress) */
offset = i_size & (PAGE_SIZE-1);
if (page->index >= end_index+1 || !offset) {
- /*
- * The page may have dirty, unmapped buffers. For example,
- * they may have been added in ext3_writepage(). Make them
- * freeable here, so the page does not leak.
- */
- do_invalidatepage(page, 0, PAGE_SIZE);
unlock_page(page);
return 0; /* don't care */
}
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index ad057ed2b30b..bd5fe8d00d00 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -121,7 +121,7 @@ static int cachefiles_read_reissue(struct cachefiles_object *object,
_debug("reissue read");
ret = bmapping->a_ops->readpage(NULL, backpage);
if (ret < 0)
- goto unlock_discard;
+ goto discard;
}
/* but the page may have been read before the monitor was installed, so
@@ -138,6 +138,7 @@ static int cachefiles_read_reissue(struct cachefiles_object *object,
unlock_discard:
unlock_page(backpage);
+discard:
spin_lock_irq(&object->work_lock);
list_del(&monitor->op_link);
spin_unlock_irq(&object->work_lock);
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 7ab616601141..a02e845eb0fb 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1427,7 +1427,7 @@ static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf)
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_file_info *fi = vma->vm_file->private_data;
struct page *pinned_page = NULL;
- loff_t off = vmf->pgoff << PAGE_SHIFT;
+ loff_t off = (loff_t)vmf->pgoff << PAGE_SHIFT;
int want, got, err;
sigset_t oldset;
vm_fault_t ret = VM_FAULT_SIGBUS;
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 17df90b5f57a..fd9e289f3e72 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -2614,13 +2614,18 @@ cifs_setattr(struct dentry *direntry, struct iattr *attrs)
{
struct cifs_sb_info *cifs_sb = CIFS_SB(direntry->d_sb);
struct cifs_tcon *pTcon = cifs_sb_master_tcon(cifs_sb);
+ int rc, retries = 0;
- if (pTcon->unix_ext)
- return cifs_setattr_unix(direntry, attrs);
-
- return cifs_setattr_nounix(direntry, attrs);
+ do {
+ if (pTcon->unix_ext)
+ rc = cifs_setattr_unix(direntry, attrs);
+ else
+ rc = cifs_setattr_nounix(direntry, attrs);
+ retries++;
+ } while (is_retryable_error(rc) && retries < 2);
/* BB: add cifs_setattr_legacy for really old servers */
+ return rc;
}
#if 0
diff --git a/fs/exec.c b/fs/exec.c
index de833553ae27..2441eb1a1e2d 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1044,11 +1044,24 @@ static int exec_mmap(struct mm_struct *mm)
}
task_lock(tsk);
- active_mm = tsk->active_mm;
membarrier_exec_mmap(mm);
- tsk->mm = mm;
+
+ local_irq_disable();
+ active_mm = tsk->active_mm;
tsk->active_mm = mm;
+ tsk->mm = mm;
+ /*
+ * This prevents preemption while active_mm is being loaded and
+ * it and mm are being updated, which could cause problems for
+ * lazy tlb mm refcounting when these are updated by context
+ * switches. Not all architectures can handle irqs off over
+ * activate_mm yet.
+ */
+ if (!IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM))
+ local_irq_enable();
activate_mm(active_mm, mm);
+ if (IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM))
+ local_irq_enable();
tsk->mm->vmacache_seqnum = 0;
vmacache_flush(tsk);
task_unlock(tsk);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 95a8a04c77dd..cbd028a31daf 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5271,6 +5271,12 @@ static int ext4_do_update_inode(handle_t *handle,
if (ext4_test_inode_state(inode, EXT4_STATE_NEW))
memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size);
+ err = ext4_inode_blocks_set(handle, raw_inode, ei);
+ if (err) {
+ spin_unlock(&ei->i_raw_lock);
+ goto out_brelse;
+ }
+
raw_inode->i_mode = cpu_to_le16(inode->i_mode);
i_uid = i_uid_read(inode);
i_gid = i_gid_read(inode);
@@ -5304,11 +5310,6 @@ static int ext4_do_update_inode(handle_t *handle,
EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode);
EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode);
- err = ext4_inode_blocks_set(handle, raw_inode, ei);
- if (err) {
- spin_unlock(&ei->i_raw_lock);
- goto out_brelse;
- }
raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
raw_inode->i_flags = cpu_to_le32(ei->i_flags & 0xFFFFFFFF);
if (likely(!test_opt2(inode->i_sb, HURD_COMPAT)))
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 080e25f6ef56..ad1d4c8faf44 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -861,8 +861,10 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
BUFFER_TRACE(dind, "get_write_access");
err = ext4_journal_get_write_access(handle, dind);
- if (unlikely(err))
+ if (unlikely(err)) {
ext4_std_error(sb, err);
+ goto errout;
+ }
/* ext4_reserve_inode_write() gets a reference on the iloc */
err = ext4_reserve_inode_write(handle, inode, &iloc);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 4aae7e3e89a1..6a260cc8bce6 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -4684,6 +4684,7 @@ cantfind_ext4:
failed_mount8:
ext4_unregister_sysfs(sb);
+ kobject_put(&sbi->s_kobj);
failed_mount7:
ext4_unregister_li_request(sb);
failed_mount6:
@@ -5856,6 +5857,11 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
/* Quotafile not on the same filesystem? */
if (path->dentry->d_sb != sb)
return -EXDEV;
+
+ /* Quota already enabled for this file? */
+ if (IS_NOQUOTA(d_inode(path->dentry)))
+ return -EBUSY;
+
/* Journaling quota? */
if (EXT4_SB(sb)->s_qf_names[type]) {
/* Quotafile not in fs root? */
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index bbd07fe8a492..c966ccc44c15 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -108,7 +108,7 @@ struct page *f2fs_get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
return __get_meta_page(sbi, index, true);
}
-struct page *f2fs_get_meta_page_nofail(struct f2fs_sb_info *sbi, pgoff_t index)
+struct page *f2fs_get_meta_page_retry(struct f2fs_sb_info *sbi, pgoff_t index)
{
struct page *page;
int count = 0;
@@ -243,6 +243,8 @@ int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
blkno * NAT_ENTRY_PER_BLOCK);
break;
case META_SIT:
+ if (unlikely(blkno >= TOTAL_SEGS(sbi)))
+ goto out;
/* get sit block addr */
fio.new_blkaddr = current_sit_addr(sbi,
blkno * SIT_ENTRY_PER_BLOCK);
@@ -1044,8 +1046,12 @@ int f2fs_sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type)
get_pages(sbi, is_dir ?
F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA));
retry:
- if (unlikely(f2fs_cp_error(sbi)))
+ if (unlikely(f2fs_cp_error(sbi))) {
+ trace_f2fs_sync_dirty_inodes_exit(sbi->sb, is_dir,
+ get_pages(sbi, is_dir ?
+ F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA));
return -EIO;
+ }
spin_lock(&sbi->inode_lock[type]);
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index e9af46dc06f7..78d041f9775a 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -303,16 +303,15 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
unsigned int max_depth;
unsigned int level;
+ *res_page = NULL;
+
if (f2fs_has_inline_dentry(dir)) {
- *res_page = NULL;
de = f2fs_find_in_inline_dir(dir, fname, res_page);
goto out;
}
- if (npages == 0) {
- *res_page = NULL;
+ if (npages == 0)
goto out;
- }
max_depth = F2FS_I(dir)->i_current_depth;
if (unlikely(max_depth > MAX_DIR_HASH_DEPTH)) {
@@ -323,7 +322,6 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
}
for (level = 0; level < max_depth; level++) {
- *res_page = NULL;
de = find_in_level(dir, level, fname, res_page);
if (de || IS_ERR(*res_page))
break;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index b3b7e63394be..63440abe58c4 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -3149,7 +3149,7 @@ enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi,
void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io);
struct page *f2fs_grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index);
struct page *f2fs_get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index);
-struct page *f2fs_get_meta_page_nofail(struct f2fs_sb_info *sbi, pgoff_t index);
+struct page *f2fs_get_meta_page_retry(struct f2fs_sb_info *sbi, pgoff_t index);
struct page *f2fs_get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index);
bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
block_t blkaddr, int type);
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index ed12e9668184..2a4a382f28fe 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -109,7 +109,7 @@ static void clear_node_page_dirty(struct page *page)
static struct page *get_current_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
{
- return f2fs_get_meta_page_nofail(sbi, current_nat_addr(sbi, nid));
+ return f2fs_get_meta_page(sbi, current_nat_addr(sbi, nid));
}
static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 7d8578401267..5ba677f85533 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -2310,7 +2310,9 @@ int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra)
*/
struct page *f2fs_get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
{
- return f2fs_get_meta_page_nofail(sbi, GET_SUM_BLOCK(sbi, segno));
+ if (unlikely(f2fs_cp_error(sbi)))
+ return ERR_PTR(-EIO);
+ return f2fs_get_meta_page_retry(sbi, GET_SUM_BLOCK(sbi, segno));
}
void f2fs_update_meta_page(struct f2fs_sb_info *sbi,
@@ -2582,7 +2584,11 @@ static void change_curseg(struct f2fs_sb_info *sbi, int type)
__next_free_blkoff(sbi, curseg, 0);
sum_page = f2fs_get_sum_page(sbi, new_segno);
- f2fs_bug_on(sbi, IS_ERR(sum_page));
+ if (IS_ERR(sum_page)) {
+ /* GC won't be able to use stale summary pages by cp_error */
+ memset(curseg->sum_blk, 0, SUM_ENTRY_SIZE);
+ return;
+ }
sum_node = (struct f2fs_summary_block *)page_address(sum_page);
memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
f2fs_put_page(sum_page, 1);
@@ -3713,7 +3719,7 @@ int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
unsigned int segno)
{
- return f2fs_get_meta_page_nofail(sbi, current_sit_addr(sbi, segno));
+ return f2fs_get_meta_page(sbi, current_sit_addr(sbi, segno));
}
static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 0290a22ebccf..9e1685a30bf8 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -873,7 +873,8 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
out_free:
kfree(gl->gl_lksb.sb_lvbptr);
kmem_cache_free(cachep, gl);
- atomic_dec(&sdp->sd_glock_disposal);
+ if (atomic_dec_and_test(&sdp->sd_glock_disposal))
+ wake_up(&sdp->sd_glock_wait);
out:
return ret;
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 5f89c515f5bb..33a6b074209d 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -694,6 +694,7 @@ struct gfs2_sbd {
struct super_block *sd_vfs;
struct gfs2_pcpu_lkstats __percpu *sd_lkstats;
struct kobject sd_kobj;
+ struct completion sd_kobj_unregister;
unsigned long sd_flags; /* SDF_... */
struct gfs2_sb_host sd_sb;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index e0c55765b06d..29b27d769860 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -169,15 +169,19 @@ static int gfs2_check_sb(struct gfs2_sbd *sdp, int silent)
return -EINVAL;
}
- /* If format numbers match exactly, we're done. */
-
- if (sb->sb_fs_format == GFS2_FORMAT_FS &&
- sb->sb_multihost_format == GFS2_FORMAT_MULTI)
- return 0;
+ if (sb->sb_fs_format != GFS2_FORMAT_FS ||
+ sb->sb_multihost_format != GFS2_FORMAT_MULTI) {
+ fs_warn(sdp, "Unknown on-disk format, unable to mount\n");
+ return -EINVAL;
+ }
- fs_warn(sdp, "Unknown on-disk format, unable to mount\n");
+ if (sb->sb_bsize < 512 || sb->sb_bsize > PAGE_SIZE ||
+ (sb->sb_bsize & (sb->sb_bsize - 1))) {
+ pr_warn("Invalid superblock size\n");
+ return -EINVAL;
+ }
- return -EINVAL;
+ return 0;
}
static void end_bio_io_page(struct bio *bio)
@@ -1094,26 +1098,14 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc)
}
error = init_names(sdp, silent);
- if (error) {
- /* In this case, we haven't initialized sysfs, so we have to
- manually free the sdp. */
- free_sbd(sdp);
- sb->s_fs_info = NULL;
- return error;
- }
+ if (error)
+ goto fail_free;
snprintf(sdp->sd_fsname, sizeof(sdp->sd_fsname), "%s", sdp->sd_table_name);
error = gfs2_sys_fs_add(sdp);
- /*
- * If we hit an error here, gfs2_sys_fs_add will have called function
- * kobject_put which causes the sysfs usage count to go to zero, which
- * causes sysfs to call function gfs2_sbd_release, which frees sdp.
- * Subsequent error paths here will call gfs2_sys_fs_del, which also
- * kobject_put to free sdp.
- */
if (error)
- return error;
+ goto fail_free;
gfs2_create_debugfs_file(sdp);
@@ -1210,9 +1202,9 @@ fail_lm:
gfs2_lm_unmount(sdp);
fail_debug:
gfs2_delete_debugfs_file(sdp);
- /* gfs2_sys_fs_del must be the last thing we do, since it causes
- * sysfs to call function gfs2_sbd_release, which frees sdp. */
gfs2_sys_fs_del(sdp);
+fail_free:
+ free_sbd(sdp);
sb->s_fs_info = NULL;
return error;
}
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 5fa1eec4fb4f..5935ce5ae563 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -695,6 +695,7 @@ restart:
/* At this point, we're through participating in the lockspace */
gfs2_sys_fs_del(sdp);
+ free_sbd(sdp);
}
/**
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index dd15b8e4af2c..1c6e52dc878e 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -302,7 +302,7 @@ static void gfs2_sbd_release(struct kobject *kobj)
{
struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
- free_sbd(sdp);
+ complete(&sdp->sd_kobj_unregister);
}
static struct kobj_type gfs2_ktype = {
@@ -652,6 +652,7 @@ int gfs2_sys_fs_add(struct gfs2_sbd *sdp)
sprintf(ro, "RDONLY=%d", sb_rdonly(sb));
sprintf(spectator, "SPECTATOR=%d", sdp->sd_args.ar_spectator ? 1 : 0);
+ init_completion(&sdp->sd_kobj_unregister);
sdp->sd_kobj.kset = gfs2_kset;
error = kobject_init_and_add(&sdp->sd_kobj, &gfs2_ktype, NULL,
"%s", sdp->sd_table_name);
@@ -682,6 +683,7 @@ fail_tune:
fail_reg:
fs_err(sdp, "error %d adding sysfs files\n", error);
kobject_put(&sdp->sd_kobj);
+ wait_for_completion(&sdp->sd_kobj_unregister);
sb->s_fs_info = NULL;
return error;
}
@@ -692,6 +694,7 @@ void gfs2_sys_fs_del(struct gfs2_sbd *sdp)
sysfs_remove_group(&sdp->sd_kobj, &tune_group);
sysfs_remove_group(&sdp->sd_kobj, &lock_module_group);
kobject_put(&sdp->sd_kobj);
+ wait_for_completion(&sdp->sd_kobj_unregister);
}
static int gfs2_uevent(struct kset *kset, struct kobject *kobj,
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 9287eb666322..2db17fdf516b 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -31,9 +31,9 @@ int nfs_mountpoint_expiry_timeout = 500 * HZ;
/*
* nfs_path - reconstruct the path given an arbitrary dentry
* @base - used to return pointer to the end of devname part of path
- * @dentry - pointer to dentry
+ * @dentry_in - pointer to dentry
* @buffer - result buffer
- * @buflen - length of buffer
+ * @buflen_in - length of buffer
* @flags - options (see below)
*
* Helper function for constructing the server pathname
@@ -48,15 +48,19 @@ int nfs_mountpoint_expiry_timeout = 500 * HZ;
* the original device (export) name
* (if unset, the original name is returned verbatim)
*/
-char *nfs_path(char **p, struct dentry *dentry, char *buffer, ssize_t buflen,
- unsigned flags)
+char *nfs_path(char **p, struct dentry *dentry_in, char *buffer,
+ ssize_t buflen_in, unsigned flags)
{
char *end;
int namelen;
unsigned seq;
const char *base;
+ struct dentry *dentry;
+ ssize_t buflen;
rename_retry:
+ buflen = buflen_in;
+ dentry = dentry_in;
end = buffer+buflen;
*--end = '\0';
buflen--;
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index bb322d9de313..c4a98cbda6dd 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -570,6 +570,14 @@ static inline bool nfs4_stateid_is_newer(const nfs4_stateid *s1, const nfs4_stat
return (s32)(be32_to_cpu(s1->seqid) - be32_to_cpu(s2->seqid)) > 0;
}
+static inline bool nfs4_stateid_is_next(const nfs4_stateid *s1, const nfs4_stateid *s2)
+{
+ u32 seq1 = be32_to_cpu(s1->seqid);
+ u32 seq2 = be32_to_cpu(s2->seqid);
+
+ return seq2 == seq1 + 1U || (seq2 == 1U && seq1 == 0xffffffffU);
+}
+
static inline void nfs4_stateid_seqid_inc(nfs4_stateid *s1)
{
u32 seqid = be32_to_cpu(s1->seqid);
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 534b6fd70ffd..6b31cb5f9c9d 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -138,7 +138,8 @@ static ssize_t __nfs4_copy_file_range(struct file *file_in, loff_t pos_in,
/* Only offload copy if superblock is the same */
if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb)
return -EXDEV;
- if (!nfs_server_capable(file_inode(file_out), NFS_CAP_COPY))
+ if (!nfs_server_capable(file_inode(file_out), NFS_CAP_COPY) ||
+ !nfs_server_capable(file_inode(file_in), NFS_CAP_COPY))
return -EOPNOTSUPP;
if (file_inode(file_in) == file_inode(file_out))
return -EOPNOTSUPP;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 00435556db0c..ddc900df461c 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1515,19 +1515,6 @@ static void nfs_state_log_update_open_stateid(struct nfs4_state *state)
wake_up_all(&state->waitq);
}
-static void nfs_state_log_out_of_order_open_stateid(struct nfs4_state *state,
- const nfs4_stateid *stateid)
-{
- u32 state_seqid = be32_to_cpu(state->open_stateid.seqid);
- u32 stateid_seqid = be32_to_cpu(stateid->seqid);
-
- if (stateid_seqid == state_seqid + 1U ||
- (stateid_seqid == 1U && state_seqid == 0xffffffffU))
- nfs_state_log_update_open_stateid(state);
- else
- set_bit(NFS_STATE_CHANGE_WAIT, &state->flags);
-}
-
static void nfs_test_and_clear_all_open_stateid(struct nfs4_state *state)
{
struct nfs_client *clp = state->owner->so_server->nfs_client;
@@ -1553,21 +1540,19 @@ static void nfs_test_and_clear_all_open_stateid(struct nfs4_state *state)
* i.e. The stateid seqids have to be initialised to 1, and
* are then incremented on every state transition.
*/
-static bool nfs_need_update_open_stateid(struct nfs4_state *state,
+static bool nfs_stateid_is_sequential(struct nfs4_state *state,
const nfs4_stateid *stateid)
{
- if (test_bit(NFS_OPEN_STATE, &state->flags) == 0 ||
- !nfs4_stateid_match_other(stateid, &state->open_stateid)) {
+ if (test_bit(NFS_OPEN_STATE, &state->flags)) {
+ /* The common case - we're updating to a new sequence number */
+ if (nfs4_stateid_match_other(stateid, &state->open_stateid) &&
+ nfs4_stateid_is_next(&state->open_stateid, stateid)) {
+ return true;
+ }
+ } else {
+ /* This is the first OPEN in this generation */
if (stateid->seqid == cpu_to_be32(1))
- nfs_state_log_update_open_stateid(state);
- else
- set_bit(NFS_STATE_CHANGE_WAIT, &state->flags);
- return true;
- }
-
- if (nfs4_stateid_is_newer(stateid, &state->open_stateid)) {
- nfs_state_log_out_of_order_open_stateid(state, stateid);
- return true;
+ return true;
}
return false;
}
@@ -1641,16 +1626,16 @@ static void nfs_set_open_stateid_locked(struct nfs4_state *state,
int status = 0;
for (;;) {
- if (!nfs_need_update_open_stateid(state, stateid))
- return;
- if (!test_bit(NFS_STATE_CHANGE_WAIT, &state->flags))
+ if (nfs_stateid_is_sequential(state, stateid))
break;
+
if (status)
break;
/* Rely on seqids for serialisation with NFSv4.0 */
if (!nfs4_has_session(NFS_SERVER(state->inode)->nfs_client))
break;
+ set_bit(NFS_STATE_CHANGE_WAIT, &state->flags);
prepare_to_wait(&state->waitq, &wait, TASK_KILLABLE);
/*
* Ensure we process the state changes in the same order
@@ -1661,6 +1646,7 @@ static void nfs_set_open_stateid_locked(struct nfs4_state *state,
spin_unlock(&state->owner->so_lock);
rcu_read_unlock();
trace_nfs4_open_stateid_update_wait(state->inode, stateid, 0);
+
if (!signal_pending(current)) {
if (schedule_timeout(5*HZ) == 0)
status = -EAGAIN;
@@ -3397,7 +3383,8 @@ static bool nfs4_refresh_open_old_stateid(nfs4_stateid *dst,
__be32 seqid_open;
u32 dst_seqid;
bool ret;
- int seq;
+ int seq, status = -EAGAIN;
+ DEFINE_WAIT(wait);
for (;;) {
ret = false;
@@ -3409,15 +3396,41 @@ static bool nfs4_refresh_open_old_stateid(nfs4_stateid *dst,
continue;
break;
}
+
+ write_seqlock(&state->seqlock);
seqid_open = state->open_stateid.seqid;
- if (read_seqretry(&state->seqlock, seq))
- continue;
dst_seqid = be32_to_cpu(dst->seqid);
- if ((s32)(dst_seqid - be32_to_cpu(seqid_open)) >= 0)
- dst->seqid = cpu_to_be32(dst_seqid + 1);
- else
+
+ /* Did another OPEN bump the state's seqid? try again: */
+ if ((s32)(be32_to_cpu(seqid_open) - dst_seqid) > 0) {
dst->seqid = seqid_open;
+ write_sequnlock(&state->seqlock);
+ ret = true;
+ break;
+ }
+
+ /* server says we're behind but we haven't seen the update yet */
+ set_bit(NFS_STATE_CHANGE_WAIT, &state->flags);
+ prepare_to_wait(&state->waitq, &wait, TASK_KILLABLE);
+ write_sequnlock(&state->seqlock);
+ trace_nfs4_close_stateid_update_wait(state->inode, dst, 0);
+
+ if (signal_pending(current))
+ status = -EINTR;
+ else
+ if (schedule_timeout(5*HZ) != 0)
+ status = 0;
+
+ finish_wait(&state->waitq, &wait);
+
+ if (!status)
+ continue;
+ if (status == -EINTR)
+ break;
+
+ /* we slept the whole 5 seconds, we must have lost a seqid */
+ dst->seqid = cpu_to_be32(dst_seqid + 1);
ret = true;
break;
}
@@ -7846,9 +7859,11 @@ int nfs4_proc_secinfo(struct inode *dir, const struct qstr *name,
* both PNFS and NON_PNFS flags set, and not having one of NON_PNFS, PNFS, or
* DS flags set.
*/
-static int nfs4_check_cl_exchange_flags(u32 flags)
+static int nfs4_check_cl_exchange_flags(u32 flags, u32 version)
{
- if (flags & ~EXCHGID4_FLAG_MASK_R)
+ if (version >= 2 && (flags & ~EXCHGID4_2_FLAG_MASK_R))
+ goto out_inval;
+ else if (version < 2 && (flags & ~EXCHGID4_FLAG_MASK_R))
goto out_inval;
if ((flags & EXCHGID4_FLAG_USE_PNFS_MDS) &&
(flags & EXCHGID4_FLAG_USE_NON_PNFS))
@@ -8261,7 +8276,8 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, const struct cred *cre
if (status != 0)
goto out;
- status = nfs4_check_cl_exchange_flags(resp->flags);
+ status = nfs4_check_cl_exchange_flags(resp->flags,
+ clp->cl_mvops->minor_version);
if (status != 0)
goto out;
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index 9398c0b6e0a3..2295a934a154 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -1291,6 +1291,7 @@ DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_setattr);
DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_delegreturn);
DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_open_stateid_update);
DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_open_stateid_update_wait);
+DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_close_stateid_update_wait);
DECLARE_EVENT_CLASS(nfs4_getattr_event,
TP_PROTO(
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index c83ddac22f38..754c763374dd 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -118,6 +118,13 @@ done:
return nfsd_return_attrs(nfserr, resp);
}
+/* Obsolete, replaced by MNTPROC_MNT. */
+static __be32
+nfsd_proc_root(struct svc_rqst *rqstp)
+{
+ return nfs_ok;
+}
+
/*
* Look up a path name component
* Note: the dentry in the resp->fh may be negative if the file
@@ -203,6 +210,13 @@ nfsd_proc_read(struct svc_rqst *rqstp)
return fh_getattr(&resp->fh, &resp->stat);
}
+/* Reserved */
+static __be32
+nfsd_proc_writecache(struct svc_rqst *rqstp)
+{
+ return nfs_ok;
+}
+
/*
* Write data to a file
* N.B. After this call resp->fh needs an fh_put
@@ -617,6 +631,7 @@ static const struct svc_procedure nfsd_procedures2[18] = {
.pc_xdrressize = ST+AT,
},
[NFSPROC_ROOT] = {
+ .pc_func = nfsd_proc_root,
.pc_decode = nfssvc_decode_void,
.pc_encode = nfssvc_encode_void,
.pc_argsize = sizeof(struct nfsd_void),
@@ -654,6 +669,7 @@ static const struct svc_procedure nfsd_procedures2[18] = {
.pc_xdrressize = ST+AT+1+NFSSVC_MAXBLKSIZE_V2/4,
},
[NFSPROC_WRITECACHE] = {
+ .pc_func = nfsd_proc_writecache,
.pc_decode = nfssvc_decode_void,
.pc_encode = nfssvc_encode_void,
.pc_argsize = sizeof(struct nfsd_void),
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index e4b52783819d..992b74f9c941 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -1123,6 +1123,7 @@ int dbg_check_dir(struct ubifs_info *c, const struct inode *dir)
err = PTR_ERR(dent);
if (err == -ENOENT)
break;
+ kfree(pdent);
return err;
}
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index 5f2ac5ef0891..f78c3e3ef931 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -894,6 +894,7 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode)
if (err == -ENOENT)
break;
+ kfree(pxent);
goto out_release;
}
@@ -906,6 +907,7 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode)
ubifs_err(c, "dead directory entry '%s', error %d",
xent->name, err);
ubifs_ro_mode(c, err);
+ kfree(pxent);
kfree(xent);
goto out_release;
}
@@ -936,8 +938,6 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode)
inode->i_ino);
release_head(c, BASEHD);
- ubifs_add_auth_dirt(c, lnum);
-
if (last_reference) {
err = ubifs_tnc_remove_ino(c, inode->i_ino);
if (err)
@@ -947,6 +947,8 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode)
} else {
union ubifs_key key;
+ ubifs_add_auth_dirt(c, lnum);
+
ino_key_init(c, &key, inode->i_ino);
err = ubifs_tnc_add(c, &key, lnum, offs, ilen, hash);
}
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c
index 283f9eb48410..b0117878b3a0 100644
--- a/fs/ubifs/orphan.c
+++ b/fs/ubifs/orphan.c
@@ -173,6 +173,7 @@ int ubifs_add_orphan(struct ubifs_info *c, ino_t inum)
err = PTR_ERR(xent);
if (err == -ENOENT)
break;
+ kfree(pxent);
return err;
}
@@ -182,6 +183,7 @@ int ubifs_add_orphan(struct ubifs_info *c, ino_t inum)
xattr_orphan = orphan_add(c, xattr_inum, orphan);
if (IS_ERR(xattr_orphan)) {
+ kfree(pxent);
kfree(xent);
return PTR_ERR(xattr_orphan);
}
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 7fc2f3f07c16..e49bd69dfc1c 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1092,14 +1092,20 @@ static int ubifs_parse_options(struct ubifs_info *c, char *options,
break;
}
case Opt_auth_key:
- c->auth_key_name = kstrdup(args[0].from, GFP_KERNEL);
- if (!c->auth_key_name)
- return -ENOMEM;
+ if (!is_remount) {
+ c->auth_key_name = kstrdup(args[0].from,
+ GFP_KERNEL);
+ if (!c->auth_key_name)
+ return -ENOMEM;
+ }
break;
case Opt_auth_hash_name:
- c->auth_hash_name = kstrdup(args[0].from, GFP_KERNEL);
- if (!c->auth_hash_name)
- return -ENOMEM;
+ if (!is_remount) {
+ c->auth_hash_name = kstrdup(args[0].from,
+ GFP_KERNEL);
+ if (!c->auth_hash_name)
+ return -ENOMEM;
+ }
break;
case Opt_ignore:
break;
@@ -1123,6 +1129,18 @@ static int ubifs_parse_options(struct ubifs_info *c, char *options,
return 0;
}
+/*
+ * ubifs_release_options - release mount parameters which have been dumped.
+ * @c: UBIFS file-system description object
+ */
+static void ubifs_release_options(struct ubifs_info *c)
+{
+ kfree(c->auth_key_name);
+ c->auth_key_name = NULL;
+ kfree(c->auth_hash_name);
+ c->auth_hash_name = NULL;
+}
+
/**
* destroy_journal - destroy journal data structures.
* @c: UBIFS file-system description object
@@ -1295,7 +1313,7 @@ static int mount_ubifs(struct ubifs_info *c)
err = ubifs_read_superblock(c);
if (err)
- goto out_free;
+ goto out_auth;
c->probing = 0;
@@ -1307,18 +1325,18 @@ static int mount_ubifs(struct ubifs_info *c)
ubifs_err(c, "'compressor \"%s\" is not compiled in",
ubifs_compr_name(c, c->default_compr));
err = -ENOTSUPP;
- goto out_free;
+ goto out_auth;
}
err = init_constants_sb(c);
if (err)
- goto out_free;
+ goto out_auth;
sz = ALIGN(c->max_idx_node_sz, c->min_io_size) * 2;
c->cbuf = kmalloc(sz, GFP_NOFS);
if (!c->cbuf) {
err = -ENOMEM;
- goto out_free;
+ goto out_auth;
}
err = alloc_wbufs(c);
@@ -1593,6 +1611,8 @@ out_wbufs:
free_wbufs(c);
out_cbuf:
kfree(c->cbuf);
+out_auth:
+ ubifs_exit_authentication(c);
out_free:
kfree(c->write_reserve_buf);
kfree(c->bu.buf);
@@ -1632,8 +1652,7 @@ static void ubifs_umount(struct ubifs_info *c)
ubifs_lpt_free(c, 0);
ubifs_exit_authentication(c);
- kfree(c->auth_key_name);
- kfree(c->auth_hash_name);
+ ubifs_release_options(c);
kfree(c->cbuf);
kfree(c->rcvrd_mst_node);
kfree(c->mst_node);
@@ -2201,6 +2220,7 @@ out_umount:
out_unlock:
mutex_unlock(&c->umount_mutex);
out_close:
+ ubifs_release_options(c);
ubi_close_volume(c->ubi);
out:
return err;
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index e8e7b0e9532e..33742ee3945b 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -2885,6 +2885,7 @@ int ubifs_tnc_remove_ino(struct ubifs_info *c, ino_t inum)
err = PTR_ERR(xent);
if (err == -ENOENT)
break;
+ kfree(pxent);
return err;
}
@@ -2898,6 +2899,7 @@ int ubifs_tnc_remove_ino(struct ubifs_info *c, ino_t inum)
fname_len(&nm) = le16_to_cpu(xent->nlen);
err = ubifs_tnc_remove_nm(c, &key1, &nm);
if (err) {
+ kfree(pxent);
kfree(xent);
return err;
}
@@ -2906,6 +2908,7 @@ int ubifs_tnc_remove_ino(struct ubifs_info *c, ino_t inum)
highest_ino_key(c, &key2, xattr_inum);
err = ubifs_tnc_remove_range(c, &key1, &key2);
if (err) {
+ kfree(pxent);
kfree(xent);
return err;
}
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index 9aefbb60074f..a0b9b349efe6 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -522,6 +522,7 @@ int ubifs_purge_xattrs(struct inode *host)
xent->name, err);
ubifs_ro_mode(c, err);
kfree(pxent);
+ kfree(xent);
return err;
}
@@ -531,6 +532,7 @@ int ubifs_purge_xattrs(struct inode *host)
err = remove_xattr(c, host, xino, &nm);
if (err) {
kfree(pxent);
+ kfree(xent);
iput(xino);
ubifs_err(c, "cannot remove xattr, error %d", err);
return err;
diff --git a/fs/udf/super.c b/fs/udf/super.c
index a0cd766b41cd..4aba4878ed96 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -1703,7 +1703,8 @@ static noinline int udf_process_sequence(
"Pointers (max %u supported)\n",
UDF_MAX_TD_NESTING);
brelse(bh);
- return -EIO;
+ ret = -EIO;
+ goto out;
}
vdp = (struct volDescPtr *)bh->b_data;
@@ -1723,7 +1724,8 @@ static noinline int udf_process_sequence(
curr = get_volume_descriptor_record(ident, bh, &data);
if (IS_ERR(curr)) {
brelse(bh);
- return PTR_ERR(curr);
+ ret = PTR_ERR(curr);
+ goto out;
}
/* Descriptor we don't care about? */
if (!curr)
@@ -1745,28 +1747,31 @@ static noinline int udf_process_sequence(
*/
if (!data.vds[VDS_POS_PRIMARY_VOL_DESC].block) {
udf_err(sb, "Primary Volume Descriptor not found!\n");
- return -EAGAIN;
+ ret = -EAGAIN;
+ goto out;
}
ret = udf_load_pvoldesc(sb, data.vds[VDS_POS_PRIMARY_VOL_DESC].block);
if (ret < 0)
- return ret;
+ goto out;
if (data.vds[VDS_POS_LOGICAL_VOL_DESC].block) {
ret = udf_load_logicalvol(sb,
data.vds[VDS_POS_LOGICAL_VOL_DESC].block,
fileset);
if (ret < 0)
- return ret;
+ goto out;
}
/* Now handle prevailing Partition Descriptors */
for (i = 0; i < data.num_part_descs; i++) {
ret = udf_load_partdesc(sb, data.part_descs_loc[i].rec.block);
if (ret < 0)
- return ret;
+ goto out;
}
-
- return 0;
+ ret = 0;
+out:
+ kfree(data.part_descs_loc);
+ return ret;
}
/*
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index f8db3fe616df..c114d24be619 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -4985,20 +4985,25 @@ xfs_bmap_del_extent_real(
flags = XFS_ILOG_CORE;
if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) {
- xfs_fsblock_t bno;
xfs_filblks_t len;
xfs_extlen_t mod;
- bno = div_u64_rem(del->br_startblock, mp->m_sb.sb_rextsize,
- &mod);
- ASSERT(mod == 0);
len = div_u64_rem(del->br_blockcount, mp->m_sb.sb_rextsize,
&mod);
ASSERT(mod == 0);
- error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len);
- if (error)
- goto done;
+ if (!(bflags & XFS_BMAPI_REMAP)) {
+ xfs_fsblock_t bno;
+
+ bno = div_u64_rem(del->br_startblock,
+ mp->m_sb.sb_rextsize, &mod);
+ ASSERT(mod == 0);
+
+ error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len);
+ if (error)
+ goto done;
+ }
+
do_fx = 0;
nblks = len * mp->m_sb.sb_rextsize;
qfield = XFS_TRANS_DQ_RTBCOUNT;
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index bf0435dbec43..b3021d9b34a5 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -622,7 +622,6 @@ xfs_ioc_space(
error = xfs_break_layouts(inode, &iolock, BREAK_UNMAP);
if (error)
goto out_unlock;
- inode_dio_wait(inode);
switch (bf->l_whence) {
case 0: /*SEEK_SET*/
@@ -668,6 +667,31 @@ xfs_ioc_space(
goto out_unlock;
}
+ /*
+ * Must wait for all AIO to complete before we continue as AIO can
+ * change the file size on completion without holding any locks we
+ * currently hold. We must do this first because AIO can update both
+ * the on disk and in memory inode sizes, and the operations that follow
+ * require the in-memory size to be fully up-to-date.
+ */
+ inode_dio_wait(inode);
+
+ /*
+ * Now that AIO and DIO has drained we can flush and (if necessary)
+ * invalidate the cached range over the first operation we are about to
+ * run. We include zero range here because it starts with a hole punch
+ * over the target range.
+ */
+ switch (cmd) {
+ case XFS_IOC_ZERO_RANGE:
+ case XFS_IOC_UNRESVSP:
+ case XFS_IOC_UNRESVSP64:
+ error = xfs_flush_unmap_range(ip, bf->l_start, bf->l_len);
+ if (error)
+ goto out_unlock;
+ break;
+ }
+
switch (cmd) {
case XFS_IOC_ZERO_RANGE:
flags |= XFS_PREALLOC_SET;
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index b58366937082..6d5ddc4e5135 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -1021,10 +1021,13 @@ xfs_growfs_rt(
xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL);
/*
- * Update the bitmap inode's size.
+ * Update the bitmap inode's size ondisk and incore. We need
+ * to update the incore size so that inode inactivation won't
+ * punch what it thinks are "posteof" blocks.
*/
mp->m_rbmip->i_d.di_size =
nsbp->sb_rbmblocks * nsbp->sb_blocksize;
+ i_size_write(VFS_I(mp->m_rbmip), mp->m_rbmip->i_d.di_size);
xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE);
/*
* Get the summary inode into the transaction.
@@ -1032,9 +1035,12 @@ xfs_growfs_rt(
xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, mp->m_rsumip, XFS_ILOCK_EXCL);
/*
- * Update the summary inode's size.
+ * Update the summary inode's size. We need to update the
+ * incore size so that inode inactivation won't punch what it
+ * thinks are "posteof" blocks.
*/
mp->m_rsumip->i_d.di_size = nmp->m_rsumsize;
+ i_size_write(VFS_I(mp->m_rsumip), mp->m_rsumip->i_d.di_size);
xfs_trans_log_inode(tp, mp->m_rsumip, XFS_ILOG_CORE);
/*
* Copy summary data from old to new sizes.