summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/acl.c2
-rw-r--r--fs/binfmt_flat.c23
-rw-r--r--fs/btrfs/dev-replace.c26
-rw-r--r--fs/btrfs/reada.c5
-rw-r--r--fs/btrfs/volumes.c2
-rw-r--r--fs/btrfs/volumes.h5
-rw-r--r--fs/cifs/smb2maperror.c2
-rw-r--r--fs/configfs/dir.c31
-rw-r--r--fs/crypto/policy.c2
-rw-r--r--fs/dax.c2
-rw-r--r--fs/f2fs/debug.c19
-rw-r--r--fs/f2fs/f2fs.h16
-rw-r--r--fs/f2fs/inode.c5
-rw-r--r--fs/f2fs/node.c20
-rw-r--r--fs/f2fs/recovery.c10
-rw-r--r--fs/f2fs/segment.c9
-rw-r--r--fs/f2fs/segment.h3
-rw-r--r--fs/f2fs/super.c5
-rw-r--r--fs/f2fs/xattr.c36
-rw-r--r--fs/f2fs/xattr.h2
-rw-r--r--fs/fat/file.c11
-rw-r--r--fs/fuse/dev.c2
-rw-r--r--fs/inode.c9
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayoutdev.c2
-rw-r--r--fs/nfs/nfs4proc.c20
-rw-r--r--fs/nfsd/nfs4state.c2
-rw-r--r--fs/nfsd/nfs4xdr.c4
-rw-r--r--fs/nfsd/vfs.h5
-rw-r--r--fs/ocfs2/dcache.c12
-rw-r--r--fs/ocfs2/filecheck.c1
-rw-r--r--fs/overlayfs/file.c170
-rw-r--r--fs/overlayfs/inode.c60
-rw-r--r--fs/overlayfs/namei.c8
-rw-r--r--fs/overlayfs/overlayfs.h3
-rw-r--r--fs/overlayfs/ovl_entry.h6
-rw-r--r--fs/overlayfs/super.c161
-rw-r--r--fs/overlayfs/util.c12
-rw-r--r--fs/proc/array.c2
-rw-r--r--fs/quota/dquot.c4
-rw-r--r--fs/timerfd.c5
-rw-r--r--fs/udf/inode.c93
-rw-r--r--fs/userfaultfd.c42
42 files changed, 668 insertions, 191 deletions
diff --git a/fs/9p/acl.c b/fs/9p/acl.c
index 082d227fa56b..6261719f6f2a 100644
--- a/fs/9p/acl.c
+++ b/fs/9p/acl.c
@@ -276,7 +276,7 @@ static int v9fs_xattr_set_acl(const struct xattr_handler *handler,
switch (handler->flags) {
case ACL_TYPE_ACCESS:
if (acl) {
- struct iattr iattr;
+ struct iattr iattr = { 0 };
struct posix_acl *old_acl = acl;
retval = posix_acl_update_mode(inode, &iattr.ia_mode, &acl);
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 82a48e830018..e4b59e76afb0 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -856,9 +856,14 @@ err:
static int load_flat_shared_library(int id, struct lib_info *libs)
{
+ /*
+ * This is a fake bprm struct; only the members "buf", "file" and
+ * "filename" are actually used.
+ */
struct linux_binprm bprm;
int res;
char buf[16];
+ loff_t pos = 0;
memset(&bprm, 0, sizeof(bprm));
@@ -872,25 +877,11 @@ static int load_flat_shared_library(int id, struct lib_info *libs)
if (IS_ERR(bprm.file))
return res;
- bprm.cred = prepare_exec_creds();
- res = -ENOMEM;
- if (!bprm.cred)
- goto out;
-
- /* We don't really care about recalculating credentials at this point
- * as we're past the point of no return and are dealing with shared
- * libraries.
- */
- bprm.called_set_creds = 1;
+ res = kernel_read(bprm.file, bprm.buf, BINPRM_BUF_SIZE, &pos);
- res = prepare_binprm(&bprm);
-
- if (!res)
+ if (res >= 0)
res = load_flat_file(&bprm, libs, id, NULL);
- abort_creds(bprm.cred);
-
-out:
allow_write_access(bprm.file);
fput(bprm.file);
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 8fed470bb7e1..23b13fbecdc2 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -599,17 +599,25 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
}
btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
- trans = btrfs_start_transaction(root, 0);
- if (IS_ERR(trans)) {
- mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
- return PTR_ERR(trans);
+ while (1) {
+ trans = btrfs_start_transaction(root, 0);
+ if (IS_ERR(trans)) {
+ mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
+ return PTR_ERR(trans);
+ }
+ ret = btrfs_commit_transaction(trans);
+ WARN_ON(ret);
+ /* keep away write_all_supers() during the finishing procedure */
+ mutex_lock(&fs_info->fs_devices->device_list_mutex);
+ mutex_lock(&fs_info->chunk_mutex);
+ if (src_device->has_pending_chunks) {
+ mutex_unlock(&root->fs_info->chunk_mutex);
+ mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
+ } else {
+ break;
+ }
}
- ret = btrfs_commit_transaction(trans);
- WARN_ON(ret);
- /* keep away write_all_supers() during the finishing procedure */
- mutex_lock(&fs_info->fs_devices->device_list_mutex);
- mutex_lock(&fs_info->chunk_mutex);
btrfs_dev_replace_write_lock(dev_replace);
dev_replace->replace_state =
scrub_ret ? BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index dec14b739b10..859274e38417 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -745,6 +745,7 @@ static void __reada_start_machine(struct btrfs_fs_info *fs_info)
u64 total = 0;
int i;
+again:
do {
enqueued = 0;
mutex_lock(&fs_devices->device_list_mutex);
@@ -756,6 +757,10 @@ static void __reada_start_machine(struct btrfs_fs_info *fs_info)
mutex_unlock(&fs_devices->device_list_mutex);
total += enqueued;
} while (enqueued && total < 10000);
+ if (fs_devices->seed) {
+ fs_devices = fs_devices->seed;
+ goto again;
+ }
if (enqueued == 0)
return;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 207f4e87445d..2fd000308be7 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -4873,6 +4873,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
for (i = 0; i < map->num_stripes; i++) {
num_bytes = map->stripes[i].dev->bytes_used + stripe_size;
btrfs_device_set_bytes_used(map->stripes[i].dev, num_bytes);
+ map->stripes[i].dev->has_pending_chunks = true;
}
atomic64_sub(stripe_size * map->num_stripes, &info->free_chunk_space);
@@ -7348,6 +7349,7 @@ void btrfs_update_commit_device_bytes_used(struct btrfs_transaction *trans)
for (i = 0; i < map->num_stripes; i++) {
dev = map->stripes[i].dev;
dev->commit_bytes_used = dev->bytes_used;
+ dev->has_pending_chunks = false;
}
}
mutex_unlock(&fs_info->chunk_mutex);
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 23e9285d88de..c0e3015b1bac 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -54,6 +54,11 @@ struct btrfs_device {
spinlock_t io_lock ____cacheline_aligned;
int running_pending;
+ /* When true means this device has pending chunk alloc in
+ * current transaction. Protected by chunk_mutex.
+ */
+ bool has_pending_chunks;
+
/* regular prio bios */
struct btrfs_pending_bios pending_bios;
/* sync bios */
diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c
index 18814f1d67d9..3c0bad577859 100644
--- a/fs/cifs/smb2maperror.c
+++ b/fs/cifs/smb2maperror.c
@@ -457,7 +457,7 @@ static const struct status_to_posix_error smb2_error_map_table[] = {
{STATUS_FILE_INVALID, -EIO, "STATUS_FILE_INVALID"},
{STATUS_ALLOTTED_SPACE_EXCEEDED, -EIO,
"STATUS_ALLOTTED_SPACE_EXCEEDED"},
- {STATUS_INSUFFICIENT_RESOURCES, -EREMOTEIO,
+ {STATUS_INSUFFICIENT_RESOURCES, -EAGAIN,
"STATUS_INSUFFICIENT_RESOURCES"},
{STATUS_DFS_EXIT_PATH_FOUND, -EIO, "STATUS_DFS_EXIT_PATH_FOUND"},
{STATUS_DEVICE_DATA_ERROR, -EIO, "STATUS_DEVICE_DATA_ERROR"},
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 39843fa7e11b..809c1edffbaf 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -58,15 +58,13 @@ static void configfs_d_iput(struct dentry * dentry,
if (sd) {
/* Coordinate with configfs_readdir */
spin_lock(&configfs_dirent_lock);
- /* Coordinate with configfs_attach_attr where will increase
- * sd->s_count and update sd->s_dentry to new allocated one.
- * Only set sd->dentry to null when this dentry is the only
- * sd owner.
- * If not do so, configfs_d_iput may run just after
- * configfs_attach_attr and set sd->s_dentry to null
- * even it's still in use.
+ /*
+ * Set sd->s_dentry to null only when this dentry is the one
+ * that is going to be killed. Otherwise configfs_d_iput may
+ * run just after configfs_attach_attr and set sd->s_dentry to
+ * NULL even it's still in use.
*/
- if (atomic_read(&sd->s_count) <= 2)
+ if (sd->s_dentry == dentry)
sd->s_dentry = NULL;
spin_unlock(&configfs_dirent_lock);
@@ -1755,12 +1753,19 @@ int configfs_register_group(struct config_group *parent_group,
inode_lock_nested(d_inode(parent), I_MUTEX_PARENT);
ret = create_default_group(parent_group, group);
- if (!ret) {
- spin_lock(&configfs_dirent_lock);
- configfs_dir_set_ready(group->cg_item.ci_dentry->d_fsdata);
- spin_unlock(&configfs_dirent_lock);
- }
+ if (ret)
+ goto err_out;
+
+ spin_lock(&configfs_dirent_lock);
+ configfs_dir_set_ready(group->cg_item.ci_dentry->d_fsdata);
+ spin_unlock(&configfs_dirent_lock);
+ inode_unlock(d_inode(parent));
+ return 0;
+err_out:
inode_unlock(d_inode(parent));
+ mutex_lock(&subsys->su_mutex);
+ unlink_group(group);
+ mutex_unlock(&subsys->su_mutex);
return ret;
}
EXPORT_SYMBOL(configfs_register_group);
diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c
index c6d431a5cce9..4288839501e9 100644
--- a/fs/crypto/policy.c
+++ b/fs/crypto/policy.c
@@ -81,6 +81,8 @@ int fscrypt_ioctl_set_policy(struct file *filp, const void __user *arg)
if (ret == -ENODATA) {
if (!S_ISDIR(inode->i_mode))
ret = -ENOTDIR;
+ else if (IS_DEADDIR(inode))
+ ret = -ENOENT;
else if (!inode->i_sb->s_cop->empty_dir(inode))
ret = -ENOTEMPTY;
else
diff --git a/fs/dax.c b/fs/dax.c
index 004c8ac1117c..75a289c31c7e 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -908,7 +908,7 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping,
goto unlock_pmd;
flush_cache_page(vma, address, pfn);
- pmd = pmdp_huge_clear_flush(vma, address, pmdp);
+ pmd = pmdp_invalidate(vma, address, pmdp);
pmd = pmd_wrprotect(pmd);
pmd = pmd_mkclean(pmd);
set_pmd_at(vma->vm_mm, address, pmdp, pmd);
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index ebe649d9793c..bbe155465ca0 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -94,8 +94,10 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->free_secs = free_sections(sbi);
si->prefree_count = prefree_segments(sbi);
si->dirty_count = dirty_segments(sbi);
- si->node_pages = NODE_MAPPING(sbi)->nrpages;
- si->meta_pages = META_MAPPING(sbi)->nrpages;
+ if (sbi->node_inode)
+ si->node_pages = NODE_MAPPING(sbi)->nrpages;
+ if (sbi->meta_inode)
+ si->meta_pages = META_MAPPING(sbi)->nrpages;
si->nats = NM_I(sbi)->nat_cnt;
si->dirty_nats = NM_I(sbi)->dirty_nat_cnt;
si->sits = MAIN_SEGS(sbi);
@@ -168,7 +170,6 @@ static void update_sit_info(struct f2fs_sb_info *sbi)
static void update_mem_info(struct f2fs_sb_info *sbi)
{
struct f2fs_stat_info *si = F2FS_STAT(sbi);
- unsigned npages;
int i;
if (si->base_mem)
@@ -251,10 +252,14 @@ get_cache:
sizeof(struct extent_node);
si->page_mem = 0;
- npages = NODE_MAPPING(sbi)->nrpages;
- si->page_mem += (unsigned long long)npages << PAGE_SHIFT;
- npages = META_MAPPING(sbi)->nrpages;
- si->page_mem += (unsigned long long)npages << PAGE_SHIFT;
+ if (sbi->node_inode) {
+ unsigned npages = NODE_MAPPING(sbi)->nrpages;
+ si->page_mem += (unsigned long long)npages << PAGE_SHIFT;
+ }
+ if (sbi->meta_inode) {
+ unsigned npages = META_MAPPING(sbi)->nrpages;
+ si->page_mem += (unsigned long long)npages << PAGE_SHIFT;
+ }
}
static int stat_show(struct seq_file *s, void *v)
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index a4b6eacf22ea..44ea7ac69ef4 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1744,6 +1744,7 @@ enospc:
return -ENOSPC;
}
+void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...);
static inline void dec_valid_block_count(struct f2fs_sb_info *sbi,
struct inode *inode,
block_t count)
@@ -1752,13 +1753,21 @@ static inline void dec_valid_block_count(struct f2fs_sb_info *sbi,
spin_lock(&sbi->stat_lock);
f2fs_bug_on(sbi, sbi->total_valid_block_count < (block_t) count);
- f2fs_bug_on(sbi, inode->i_blocks < sectors);
sbi->total_valid_block_count -= (block_t)count;
if (sbi->reserved_blocks &&
sbi->current_reserved_blocks < sbi->reserved_blocks)
sbi->current_reserved_blocks = min(sbi->reserved_blocks,
sbi->current_reserved_blocks + count);
spin_unlock(&sbi->stat_lock);
+ if (unlikely(inode->i_blocks < sectors)) {
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "Inconsistent i_blocks, ino:%lu, iblocks:%llu, sectors:%llu",
+ inode->i_ino,
+ (unsigned long long)inode->i_blocks,
+ (unsigned long long)sectors);
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ return;
+ }
f2fs_i_blocks_write(inode, count, false, true);
}
@@ -2488,7 +2497,9 @@ static inline void *inline_xattr_addr(struct inode *inode, struct page *page)
static inline int inline_xattr_size(struct inode *inode)
{
- return get_inline_xattr_addrs(inode) * sizeof(__le32);
+ if (f2fs_has_inline_xattr(inode))
+ return get_inline_xattr_addrs(inode) * sizeof(__le32);
+ return 0;
}
static inline int f2fs_has_inline_data(struct inode *inode)
@@ -2727,7 +2738,6 @@ static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi,
bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
block_t blkaddr, int type);
-void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...);
static inline void verify_blkaddr(struct f2fs_sb_info *sbi,
block_t blkaddr, int type)
{
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index dd608b819a3c..0f31df01e36c 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -179,8 +179,8 @@ bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct page *page)
if (provided != calculated)
f2fs_msg(sbi->sb, KERN_WARNING,
- "checksum invalid, ino = %x, %x vs. %x",
- ino_of_node(page), provided, calculated);
+ "checksum invalid, nid = %lu, ino_of_node = %x, %x vs. %x",
+ page->index, ino_of_node(page), provided, calculated);
return provided == calculated;
}
@@ -476,6 +476,7 @@ make_now:
return inode;
bad_inode:
+ f2fs_inode_synced(inode);
iget_failed(inode);
trace_f2fs_iget_exit(inode, ret);
return ERR_PTR(ret);
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 19a0d83aae65..e2d9edad758c 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1180,8 +1180,14 @@ int f2fs_remove_inode_page(struct inode *inode)
f2fs_put_dnode(&dn);
return -EIO;
}
- f2fs_bug_on(F2FS_I_SB(inode),
- inode->i_blocks != 0 && inode->i_blocks != 8);
+
+ if (unlikely(inode->i_blocks != 0 && inode->i_blocks != 8)) {
+ f2fs_msg(F2FS_I_SB(inode)->sb, KERN_WARNING,
+ "Inconsistent i_blocks, ino:%lu, iblocks:%llu",
+ inode->i_ino,
+ (unsigned long long)inode->i_blocks);
+ set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK);
+ }
/* will put inode & node pages */
err = truncate_node(&dn);
@@ -1276,9 +1282,10 @@ static int read_node_page(struct page *page, int op_flags)
int err;
if (PageUptodate(page)) {
-#ifdef CONFIG_F2FS_CHECK_FS
- f2fs_bug_on(sbi, !f2fs_inode_chksum_verify(sbi, page));
-#endif
+ if (!f2fs_inode_chksum_verify(sbi, page)) {
+ ClearPageUptodate(page);
+ return -EBADMSG;
+ }
return LOCKED_PAGE;
}
@@ -2073,6 +2080,9 @@ static bool add_free_nid(struct f2fs_sb_info *sbi,
if (unlikely(nid == 0))
return false;
+ if (unlikely(f2fs_check_nid_range(sbi, nid)))
+ return false;
+
i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS);
i->nid = nid;
i->state = FREE_NID;
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index ae0e5f2e67b4..bf5c5f4fa77e 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -485,7 +485,15 @@ retry_dn:
goto err;
f2fs_bug_on(sbi, ni.ino != ino_of_node(page));
- f2fs_bug_on(sbi, ofs_of_node(dn.node_page) != ofs_of_node(page));
+
+ if (ofs_of_node(dn.node_page) != ofs_of_node(page)) {
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "Inconsistent ofs_of_node, ino:%lu, ofs:%u, %u",
+ inode->i_ino, ofs_of_node(dn.node_page),
+ ofs_of_node(page));
+ err = -EFAULT;
+ goto err;
+ }
for (; start < end; start++, dn.ofs_in_node++) {
block_t src, dest;
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 03fa2c4d3d79..8fc3edb6760c 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -3069,13 +3069,18 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio)
{
int err;
struct f2fs_sb_info *sbi = fio->sbi;
+ unsigned int segno;
fio->new_blkaddr = fio->old_blkaddr;
/* i/o temperature is needed for passing down write hints */
__get_segment_type(fio);
- f2fs_bug_on(sbi, !IS_DATASEG(get_seg_entry(sbi,
- GET_SEGNO(sbi, fio->new_blkaddr))->type));
+ segno = GET_SEGNO(sbi, fio->new_blkaddr);
+
+ if (!IS_DATASEG(get_seg_entry(sbi, segno)->type)) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ return -EFAULT;
+ }
stat_inc_inplace_blocks(fio->sbi);
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index b3d9e317ff0c..5079532cb176 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -660,7 +660,6 @@ static inline void verify_block_addr(struct f2fs_io_info *fio, block_t blk_addr)
static inline int check_block_count(struct f2fs_sb_info *sbi,
int segno, struct f2fs_sit_entry *raw_sit)
{
-#ifdef CONFIG_F2FS_CHECK_FS
bool is_valid = test_bit_le(0, raw_sit->valid_map) ? true : false;
int valid_blocks = 0;
int cur_pos = 0, next_pos;
@@ -687,7 +686,7 @@ static inline int check_block_count(struct f2fs_sb_info *sbi,
set_sbi_flag(sbi, SBI_NEED_FSCK);
return -EINVAL;
}
-#endif
+
/* check segment usage, and check boundary of a given segment number */
if (unlikely(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg
|| segno > TOTAL_SEGS(sbi) - 1)) {
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 2264f27fd26d..1871031e2d5e 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1050,7 +1050,10 @@ static void f2fs_put_super(struct super_block *sb)
f2fs_bug_on(sbi, sbi->fsync_node_num);
iput(sbi->node_inode);
+ sbi->node_inode = NULL;
+
iput(sbi->meta_inode);
+ sbi->meta_inode = NULL;
/*
* iput() can update stat information, if f2fs_write_checkpoint()
@@ -3166,6 +3169,7 @@ free_node_inode:
f2fs_release_ino_entry(sbi, true);
truncate_inode_pages_final(NODE_MAPPING(sbi));
iput(sbi->node_inode);
+ sbi->node_inode = NULL;
free_stats:
f2fs_destroy_stats(sbi);
free_nm:
@@ -3178,6 +3182,7 @@ free_devices:
free_meta_inode:
make_bad_inode(sbi->meta_inode);
iput(sbi->meta_inode);
+ sbi->meta_inode = NULL;
free_io_dummy:
mempool_destroy(sbi->write_io_dummy);
free_percpu:
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 409a637f7a92..88e30f7cf9e1 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -205,12 +205,17 @@ static inline const struct xattr_handler *f2fs_xattr_handler(int index)
return handler;
}
-static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int index,
- size_t len, const char *name)
+static struct f2fs_xattr_entry *__find_xattr(void *base_addr,
+ void *last_base_addr, int index,
+ size_t len, const char *name)
{
struct f2fs_xattr_entry *entry;
list_for_each_xattr(entry, base_addr) {
+ if ((void *)(entry) + sizeof(__u32) > last_base_addr ||
+ (void *)XATTR_NEXT_ENTRY(entry) > last_base_addr)
+ return NULL;
+
if (entry->e_name_index != index)
continue;
if (entry->e_name_len != len)
@@ -300,20 +305,22 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage,
const char *name, struct f2fs_xattr_entry **xe,
void **base_addr, int *base_size)
{
- void *cur_addr, *txattr_addr, *last_addr = NULL;
+ void *cur_addr, *txattr_addr, *last_txattr_addr;
+ void *last_addr = NULL;
nid_t xnid = F2FS_I(inode)->i_xattr_nid;
- unsigned int size = xnid ? VALID_XATTR_BLOCK_SIZE : 0;
unsigned int inline_size = inline_xattr_size(inode);
int err = 0;
- if (!size && !inline_size)
+ if (!xnid && !inline_size)
return -ENODATA;
- *base_size = inline_size + size + XATTR_PADDING_SIZE;
+ *base_size = XATTR_SIZE(xnid, inode) + XATTR_PADDING_SIZE;
txattr_addr = f2fs_kzalloc(F2FS_I_SB(inode), *base_size, GFP_NOFS);
if (!txattr_addr)
return -ENOMEM;
+ last_txattr_addr = (void *)txattr_addr + XATTR_SIZE(xnid, inode);
+
/* read from inline xattr */
if (inline_size) {
err = read_inline_xattr(inode, ipage, txattr_addr);
@@ -340,7 +347,11 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage,
else
cur_addr = txattr_addr;
- *xe = __find_xattr(cur_addr, index, len, name);
+ *xe = __find_xattr(cur_addr, last_txattr_addr, index, len, name);
+ if (!*xe) {
+ err = -EFAULT;
+ goto out;
+ }
check:
if (IS_XATTR_LAST_ENTRY(*xe)) {
err = -ENODATA;
@@ -584,7 +595,8 @@ static int __f2fs_setxattr(struct inode *inode, int index,
struct page *ipage, int flags)
{
struct f2fs_xattr_entry *here, *last;
- void *base_addr;
+ void *base_addr, *last_base_addr;
+ nid_t xnid = F2FS_I(inode)->i_xattr_nid;
int found, newsize;
size_t len;
__u32 new_hsize;
@@ -608,8 +620,14 @@ static int __f2fs_setxattr(struct inode *inode, int index,
if (error)
return error;
+ last_base_addr = (void *)base_addr + XATTR_SIZE(xnid, inode);
+
/* find entry with wanted name. */
- here = __find_xattr(base_addr, index, len, name);
+ here = __find_xattr(base_addr, last_base_addr, index, len, name);
+ if (!here) {
+ error = -EFAULT;
+ goto exit;
+ }
found = IS_XATTR_LAST_ENTRY(here) ? 0 : 1;
diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h
index dbcd1d16e669..2a4ecaf338ea 100644
--- a/fs/f2fs/xattr.h
+++ b/fs/f2fs/xattr.h
@@ -74,6 +74,8 @@ struct f2fs_xattr_entry {
entry = XATTR_NEXT_ENTRY(entry))
#define VALID_XATTR_BLOCK_SIZE (PAGE_SIZE - sizeof(struct node_footer))
#define XATTR_PADDING_SIZE (sizeof(__u32))
+#define XATTR_SIZE(x,i) (((x) ? VALID_XATTR_BLOCK_SIZE : 0) + \
+ (inline_xattr_size(i)))
#define MIN_OFFSET(i) XATTR_ALIGN(inline_xattr_size(i) + \
VALID_XATTR_BLOCK_SIZE)
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 4f3d72fb1e60..f86ea08bd6ce 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -193,12 +193,17 @@ static int fat_file_release(struct inode *inode, struct file *filp)
int fat_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
{
struct inode *inode = filp->f_mapping->host;
- int res, err;
+ int err;
+
+ err = __generic_file_fsync(filp, start, end, datasync);
+ if (err)
+ return err;
- res = generic_file_fsync(filp, start, end, datasync);
err = sync_mapping_buffers(MSDOS_SB(inode->i_sb)->fat_inode->i_mapping);
+ if (err)
+ return err;
- return res ? res : err;
+ return blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
}
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 249de20f752a..6ee471b72a34 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1681,7 +1681,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
offset = outarg->offset & ~PAGE_MASK;
file_size = i_size_read(inode);
- num = outarg->size;
+ num = min(outarg->size, fc->max_write);
if (outarg->offset > file_size)
num = 0;
else if (outarg->offset + num > file_size)
diff --git a/fs/inode.c b/fs/inode.c
index 97f11df6ca6a..c3e17dcbb558 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1817,8 +1817,13 @@ int file_remove_privs(struct file *file)
int kill;
int error = 0;
- /* Fast path for nothing security related */
- if (IS_NOSEC(inode))
+ /*
+ * Fast path for nothing security related.
+ * As well for non-regular files, e.g. blkdev inodes.
+ * For example, blkdev_write_iter() might get here
+ * trying to remove privs which it is not allowed to.
+ */
+ if (IS_NOSEC(inode) || !S_ISREG(inode->i_mode))
return 0;
kill = dentry_needs_remove_privs(dentry);
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index a8df2f496898..364028c710a8 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -18,7 +18,7 @@
#define NFSDBG_FACILITY NFSDBG_PNFS_LD
-static unsigned int dataserver_timeo = NFS_DEF_TCP_RETRANS;
+static unsigned int dataserver_timeo = NFS_DEF_TCP_TIMEO;
static unsigned int dataserver_retrans;
static bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 42850fb5944b..78c3f4359e76 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1243,10 +1243,20 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
atomic_inc(&sp->so_count);
p->o_arg.open_flags = flags;
p->o_arg.fmode = fmode & (FMODE_READ|FMODE_WRITE);
- p->o_arg.umask = current_umask();
p->o_arg.claim = nfs4_map_atomic_open_claim(server, claim);
p->o_arg.share_access = nfs4_map_atomic_open_share(server,
fmode, flags);
+ if (flags & O_CREAT) {
+ p->o_arg.umask = current_umask();
+ p->o_arg.label = nfs4_label_copy(p->a_label, label);
+ if (c->sattr != NULL && c->sattr->ia_valid != 0) {
+ p->o_arg.u.attrs = &p->attrs;
+ memcpy(&p->attrs, c->sattr, sizeof(p->attrs));
+
+ memcpy(p->o_arg.u.verifier.data, c->verf,
+ sizeof(p->o_arg.u.verifier.data));
+ }
+ }
/* don't put an ACCESS op in OPEN compound if O_EXCL, because ACCESS
* will return permission denied for all bits until close */
if (!(flags & O_EXCL)) {
@@ -1270,7 +1280,6 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
p->o_arg.server = server;
p->o_arg.bitmask = nfs4_bitmask(server, label);
p->o_arg.open_bitmap = &nfs4_fattr_bitmap[0];
- p->o_arg.label = nfs4_label_copy(p->a_label, label);
switch (p->o_arg.claim) {
case NFS4_OPEN_CLAIM_NULL:
case NFS4_OPEN_CLAIM_DELEGATE_CUR:
@@ -1283,13 +1292,6 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
case NFS4_OPEN_CLAIM_DELEG_PREV_FH:
p->o_arg.fh = NFS_FH(d_inode(dentry));
}
- if (c != NULL && c->sattr != NULL && c->sattr->ia_valid != 0) {
- p->o_arg.u.attrs = &p->attrs;
- memcpy(&p->attrs, c->sattr, sizeof(p->attrs));
-
- memcpy(p->o_arg.u.verifier.data, c->verf,
- sizeof(p->o_arg.u.verifier.data));
- }
p->c_arg.fh = &p->o_res.fh;
p->c_arg.stateid = &p->o_res.stateid;
p->c_arg.seqid = p->o_arg.seqid;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index bec75600e692..5f62007140cf 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1523,7 +1523,7 @@ static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca)
* Never use more than a third of the remaining memory,
* unless it's the only way to give this client a slot:
*/
- avail = clamp_t(int, avail, slotsize, total_avail/3);
+ avail = clamp_t(unsigned long, avail, slotsize, total_avail/3);
num = min_t(int, num, avail / slotsize);
nfsd_drc_mem_used += num * slotsize;
spin_unlock(&nfsd_drc_lock);
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 418fa9c78186..db0beefe65ec 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2413,8 +2413,10 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
__be32 status;
int err;
struct nfs4_acl *acl = NULL;
+#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
void *context = NULL;
int contextlen;
+#endif
bool contextsupport = false;
struct nfsd4_compoundres *resp = rqstp->rq_resp;
u32 minorversion = resp->cstate.minorversion;
@@ -2899,12 +2901,14 @@ out_acl:
*p++ = cpu_to_be32(NFS4_CHANGE_TYPE_IS_TIME_METADATA);
}
+#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) {
status = nfsd4_encode_security_label(xdr, rqstp, context,
contextlen);
if (status)
goto out;
}
+#endif
attrlen = htonl(xdr->buf->len - attrlen_offset - 4);
write_bytes_to_xdr_buf(xdr->buf, attrlen_offset, &attrlen, 4);
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index a7e107309f76..db351247892d 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -120,8 +120,11 @@ void nfsd_put_raparams(struct file *file, struct raparms *ra);
static inline int fh_want_write(struct svc_fh *fh)
{
- int ret = mnt_want_write(fh->fh_export->ex_path.mnt);
+ int ret;
+ if (fh->fh_want_write)
+ return 0;
+ ret = mnt_want_write(fh->fh_export->ex_path.mnt);
if (!ret)
fh->fh_want_write = true;
return ret;
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index 290373024d9d..e8ace3b54e9c 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -310,6 +310,18 @@ int ocfs2_dentry_attach_lock(struct dentry *dentry,
out_attach:
spin_lock(&dentry_attach_lock);
+ if (unlikely(dentry->d_fsdata && !alias)) {
+ /* d_fsdata is set by a racing thread which is doing
+ * the same thing as this thread is doing. Leave the racing
+ * thread going ahead and we return here.
+ */
+ spin_unlock(&dentry_attach_lock);
+ iput(dl->dl_inode);
+ ocfs2_lock_res_free(&dl->dl_lockres);
+ kfree(dl);
+ return 0;
+ }
+
dentry->d_fsdata = dl;
dl->dl_count++;
spin_unlock(&dentry_attach_lock);
diff --git a/fs/ocfs2/filecheck.c b/fs/ocfs2/filecheck.c
index f65f2b2f594d..1906cc962c4d 100644
--- a/fs/ocfs2/filecheck.c
+++ b/fs/ocfs2/filecheck.c
@@ -193,6 +193,7 @@ int ocfs2_filecheck_create_sysfs(struct ocfs2_super *osb)
ret = kobject_init_and_add(&entry->fs_kobj, &ocfs2_ktype_filecheck,
NULL, "filecheck");
if (ret) {
+ kobject_put(&entry->fs_kobj);
kfree(fcheck);
return ret;
}
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 0c810f20f778..0bd276e4ccbe 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -11,6 +11,7 @@
#include <linux/mount.h>
#include <linux/xattr.h>
#include <linux/uio.h>
+#include <linux/uaccess.h>
#include "overlayfs.h"
static char ovl_whatisit(struct inode *inode, struct inode *realinode)
@@ -29,10 +30,11 @@ static struct file *ovl_open_realfile(const struct file *file,
struct inode *inode = file_inode(file);
struct file *realfile;
const struct cred *old_cred;
+ int flags = file->f_flags | O_NOATIME | FMODE_NONOTIFY;
old_cred = ovl_override_creds(inode->i_sb);
- realfile = open_with_fake_path(&file->f_path, file->f_flags | O_NOATIME,
- realinode, current_cred());
+ realfile = open_with_fake_path(&file->f_path, flags, realinode,
+ current_cred());
revert_creds(old_cred);
pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n",
@@ -50,7 +52,7 @@ static int ovl_change_flags(struct file *file, unsigned int flags)
int err;
/* No atime modificaton on underlying */
- flags |= O_NOATIME;
+ flags |= O_NOATIME | FMODE_NONOTIFY;
/* If some flag changed that cannot be changed then something's amiss */
if (WARN_ON((file->f_flags ^ flags) & ~OVL_SETFL_MASK))
@@ -144,11 +146,47 @@ static int ovl_release(struct inode *inode, struct file *file)
static loff_t ovl_llseek(struct file *file, loff_t offset, int whence)
{
- struct inode *realinode = ovl_inode_real(file_inode(file));
+ struct inode *inode = file_inode(file);
+ struct fd real;
+ const struct cred *old_cred;
+ ssize_t ret;
+
+ /*
+ * The two special cases below do not need to involve real fs,
+ * so we can optimizing concurrent callers.
+ */
+ if (offset == 0) {
+ if (whence == SEEK_CUR)
+ return file->f_pos;
+
+ if (whence == SEEK_SET)
+ return vfs_setpos(file, 0, 0);
+ }
+
+ ret = ovl_real_fdget(file, &real);
+ if (ret)
+ return ret;
+
+ /*
+ * Overlay file f_pos is the master copy that is preserved
+ * through copy up and modified on read/write, but only real
+ * fs knows how to SEEK_HOLE/SEEK_DATA and real fs may impose
+ * limitations that are more strict than ->s_maxbytes for specific
+ * files, so we use the real file to perform seeks.
+ */
+ inode_lock(inode);
+ real.file->f_pos = file->f_pos;
- return generic_file_llseek_size(file, offset, whence,
- realinode->i_sb->s_maxbytes,
- i_size_read(realinode));
+ old_cred = ovl_override_creds(inode->i_sb);
+ ret = vfs_llseek(real.file, offset, whence);
+ revert_creds(old_cred);
+
+ file->f_pos = real.file->f_pos;
+ inode_unlock(inode);
+
+ fdput(real);
+
+ return ret;
}
static void ovl_file_accessed(struct file *file)
@@ -371,34 +409,118 @@ static long ovl_real_ioctl(struct file *file, unsigned int cmd,
return ret;
}
-static long ovl_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+static long ovl_ioctl_set_flags(struct file *file, unsigned int cmd,
+ unsigned long arg, unsigned int iflags)
{
long ret;
struct inode *inode = file_inode(file);
+ unsigned int old_iflags;
+
+ if (!inode_owner_or_capable(inode))
+ return -EACCES;
+
+ ret = mnt_want_write_file(file);
+ if (ret)
+ return ret;
+
+ inode_lock(inode);
+
+ /* Check the capability before cred override */
+ ret = -EPERM;
+ old_iflags = READ_ONCE(inode->i_flags);
+ if (((iflags ^ old_iflags) & (S_APPEND | S_IMMUTABLE)) &&
+ !capable(CAP_LINUX_IMMUTABLE))
+ goto unlock;
+
+ ret = ovl_maybe_copy_up(file_dentry(file), O_WRONLY);
+ if (ret)
+ goto unlock;
+
+ ret = ovl_real_ioctl(file, cmd, arg);
+
+ ovl_copyflags(ovl_inode_real(inode), inode);
+unlock:
+ inode_unlock(inode);
+
+ mnt_drop_write_file(file);
+
+ return ret;
+
+}
+
+static unsigned int ovl_fsflags_to_iflags(unsigned int flags)
+{
+ unsigned int iflags = 0;
+
+ if (flags & FS_SYNC_FL)
+ iflags |= S_SYNC;
+ if (flags & FS_APPEND_FL)
+ iflags |= S_APPEND;
+ if (flags & FS_IMMUTABLE_FL)
+ iflags |= S_IMMUTABLE;
+ if (flags & FS_NOATIME_FL)
+ iflags |= S_NOATIME;
+
+ return iflags;
+}
+
+static long ovl_ioctl_set_fsflags(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ unsigned int flags;
+
+ if (get_user(flags, (int __user *) arg))
+ return -EFAULT;
+
+ return ovl_ioctl_set_flags(file, cmd, arg,
+ ovl_fsflags_to_iflags(flags));
+}
+
+static unsigned int ovl_fsxflags_to_iflags(unsigned int xflags)
+{
+ unsigned int iflags = 0;
+
+ if (xflags & FS_XFLAG_SYNC)
+ iflags |= S_SYNC;
+ if (xflags & FS_XFLAG_APPEND)
+ iflags |= S_APPEND;
+ if (xflags & FS_XFLAG_IMMUTABLE)
+ iflags |= S_IMMUTABLE;
+ if (xflags & FS_XFLAG_NOATIME)
+ iflags |= S_NOATIME;
+
+ return iflags;
+}
+
+static long ovl_ioctl_set_fsxflags(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ struct fsxattr fa;
+
+ memset(&fa, 0, sizeof(fa));
+ if (copy_from_user(&fa, (void __user *) arg, sizeof(fa)))
+ return -EFAULT;
+
+ return ovl_ioctl_set_flags(file, cmd, arg,
+ ovl_fsxflags_to_iflags(fa.fsx_xflags));
+}
+
+static long ovl_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ long ret;
switch (cmd) {
case FS_IOC_GETFLAGS:
+ case FS_IOC_FSGETXATTR:
ret = ovl_real_ioctl(file, cmd, arg);
break;
case FS_IOC_SETFLAGS:
- if (!inode_owner_or_capable(inode))
- return -EACCES;
-
- ret = mnt_want_write_file(file);
- if (ret)
- return ret;
-
- ret = ovl_maybe_copy_up(file_dentry(file), O_WRONLY);
- if (!ret) {
- ret = ovl_real_ioctl(file, cmd, arg);
-
- inode_lock(inode);
- ovl_copyflags(ovl_inode_real(inode), inode);
- inode_unlock(inode);
- }
+ ret = ovl_ioctl_set_fsflags(file, cmd, arg);
+ break;
- mnt_drop_write_file(file);
+ case FS_IOC_FSSETXATTR:
+ ret = ovl_ioctl_set_fsxflags(file, cmd, arg);
break;
default:
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index b48273e846ad..f0389849fd80 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -553,15 +553,15 @@ static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev,
int xinobits = ovl_xino_bits(inode->i_sb);
/*
- * When NFS export is enabled and d_ino is consistent with st_ino
- * (samefs or i_ino has enough bits to encode layer), set the same
- * value used for d_ino to i_ino, because nfsd readdirplus compares
- * d_ino values to i_ino values of child entries. When called from
+ * When d_ino is consistent with st_ino (samefs or i_ino has enough
+ * bits to encode layer), set the same value used for st_ino to i_ino,
+ * so inode number exposed via /proc/locks and a like will be
+ * consistent with d_ino and st_ino values. An i_ino value inconsistent
+ * with d_ino also causes nfsd readdirplus to fail. When called from
* ovl_new_inode(), ino arg is 0, so i_ino will be updated to real
* upper inode i_ino on ovl_inode_init() or ovl_inode_update().
*/
- if (inode->i_sb->s_export_op &&
- (ovl_same_sb(inode->i_sb) || xinobits)) {
+ if (ovl_same_sb(inode->i_sb) || xinobits) {
inode->i_ino = ino;
if (xinobits && fsid && !(ino >> (64 - xinobits)))
inode->i_ino |= (unsigned long)fsid << (64 - xinobits);
@@ -777,6 +777,54 @@ struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *real,
return inode;
}
+bool ovl_lookup_trap_inode(struct super_block *sb, struct dentry *dir)
+{
+ struct inode *key = d_inode(dir);
+ struct inode *trap;
+ bool res;
+
+ trap = ilookup5(sb, (unsigned long) key, ovl_inode_test, key);
+ if (!trap)
+ return false;
+
+ res = IS_DEADDIR(trap) && !ovl_inode_upper(trap) &&
+ !ovl_inode_lower(trap);
+
+ iput(trap);
+ return res;
+}
+
+/*
+ * Create an inode cache entry for layer root dir, that will intentionally
+ * fail ovl_verify_inode(), so any lookup that will find some layer root
+ * will fail.
+ */
+struct inode *ovl_get_trap_inode(struct super_block *sb, struct dentry *dir)
+{
+ struct inode *key = d_inode(dir);
+ struct inode *trap;
+
+ if (!d_is_dir(dir))
+ return ERR_PTR(-ENOTDIR);
+
+ trap = iget5_locked(sb, (unsigned long) key, ovl_inode_test,
+ ovl_inode_set, key);
+ if (!trap)
+ return ERR_PTR(-ENOMEM);
+
+ if (!(trap->i_state & I_NEW)) {
+ /* Conflicting layer roots? */
+ iput(trap);
+ return ERR_PTR(-ELOOP);
+ }
+
+ trap->i_mode = S_IFDIR;
+ trap->i_flags = S_DEAD;
+ unlock_new_inode(trap);
+
+ return trap;
+}
+
/*
* Does overlay inode need to be hashed by lower inode?
*/
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index efd372312ef1..badf039267a2 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -18,6 +18,7 @@
#include "overlayfs.h"
struct ovl_lookup_data {
+ struct super_block *sb;
struct qstr name;
bool is_dir;
bool opaque;
@@ -244,6 +245,12 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
if (!d->metacopy || d->last)
goto out;
} else {
+ if (ovl_lookup_trap_inode(d->sb, this)) {
+ /* Caught in a trap of overlapping layers */
+ err = -ELOOP;
+ goto out_err;
+ }
+
if (last_element)
d->is_dir = true;
if (d->last)
@@ -819,6 +826,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
int err;
bool metacopy = false;
struct ovl_lookup_data d = {
+ .sb = dentry->d_sb,
.name = dentry->d_name,
.is_dir = false,
.opaque = false,
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 80fb66426760..265bf9cfde08 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -270,6 +270,7 @@ void ovl_clear_flag(unsigned long flag, struct inode *inode);
bool ovl_test_flag(unsigned long flag, struct inode *inode);
bool ovl_inuse_trylock(struct dentry *dentry);
void ovl_inuse_unlock(struct dentry *dentry);
+bool ovl_is_inuse(struct dentry *dentry);
bool ovl_need_index(struct dentry *dentry);
int ovl_nlink_start(struct dentry *dentry, bool *locked);
void ovl_nlink_end(struct dentry *dentry, bool locked);
@@ -366,6 +367,8 @@ struct ovl_inode_params {
struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev);
struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *real,
bool is_upper);
+bool ovl_lookup_trap_inode(struct super_block *sb, struct dentry *dir);
+struct inode *ovl_get_trap_inode(struct super_block *sb, struct dentry *dir);
struct inode *ovl_get_inode(struct super_block *sb,
struct ovl_inode_params *oip);
static inline void ovl_copyattr(struct inode *from, struct inode *to)
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index ec237035333a..6ed1ace8f8b3 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -29,6 +29,8 @@ struct ovl_sb {
struct ovl_layer {
struct vfsmount *mnt;
+ /* Trap in ovl inode cache */
+ struct inode *trap;
struct ovl_sb *fs;
/* Index of this layer in fs root (upper idx == 0) */
int idx;
@@ -65,6 +67,10 @@ struct ovl_fs {
/* Did we take the inuse lock? */
bool upperdir_locked;
bool workdir_locked;
+ /* Traps in ovl inode cache */
+ struct inode *upperdir_trap;
+ struct inode *workdir_trap;
+ struct inode *indexdir_trap;
/* Inode numbers in all layers do not use the high xino_bits */
unsigned int xino_bits;
};
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 0fb0a59a5e5c..2d028c02621f 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -217,6 +217,9 @@ static void ovl_free_fs(struct ovl_fs *ofs)
{
unsigned i;
+ iput(ofs->indexdir_trap);
+ iput(ofs->workdir_trap);
+ iput(ofs->upperdir_trap);
dput(ofs->indexdir);
dput(ofs->workdir);
if (ofs->workdir_locked)
@@ -225,8 +228,10 @@ static void ovl_free_fs(struct ovl_fs *ofs)
if (ofs->upperdir_locked)
ovl_inuse_unlock(ofs->upper_mnt->mnt_root);
mntput(ofs->upper_mnt);
- for (i = 0; i < ofs->numlower; i++)
+ for (i = 0; i < ofs->numlower; i++) {
+ iput(ofs->lower_layers[i].trap);
mntput(ofs->lower_layers[i].mnt);
+ }
for (i = 0; i < ofs->numlowerfs; i++)
free_anon_bdev(ofs->lower_fs[i].pseudo_dev);
kfree(ofs->lower_layers);
@@ -984,7 +989,26 @@ static const struct xattr_handler *ovl_xattr_handlers[] = {
NULL
};
-static int ovl_get_upper(struct ovl_fs *ofs, struct path *upperpath)
+static int ovl_setup_trap(struct super_block *sb, struct dentry *dir,
+ struct inode **ptrap, const char *name)
+{
+ struct inode *trap;
+ int err;
+
+ trap = ovl_get_trap_inode(sb, dir);
+ err = PTR_ERR_OR_ZERO(trap);
+ if (err) {
+ if (err == -ELOOP)
+ pr_err("overlayfs: conflicting %s path\n", name);
+ return err;
+ }
+
+ *ptrap = trap;
+ return 0;
+}
+
+static int ovl_get_upper(struct super_block *sb, struct ovl_fs *ofs,
+ struct path *upperpath)
{
struct vfsmount *upper_mnt;
int err;
@@ -1004,6 +1028,11 @@ static int ovl_get_upper(struct ovl_fs *ofs, struct path *upperpath)
if (err)
goto out;
+ err = ovl_setup_trap(sb, upperpath->dentry, &ofs->upperdir_trap,
+ "upperdir");
+ if (err)
+ goto out;
+
upper_mnt = clone_private_mount(upperpath);
err = PTR_ERR(upper_mnt);
if (IS_ERR(upper_mnt)) {
@@ -1030,7 +1059,8 @@ out:
return err;
}
-static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath)
+static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs,
+ struct path *workpath)
{
struct vfsmount *mnt = ofs->upper_mnt;
struct dentry *temp;
@@ -1045,6 +1075,10 @@ static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath)
if (!ofs->workdir)
goto out;
+ err = ovl_setup_trap(sb, ofs->workdir, &ofs->workdir_trap, "workdir");
+ if (err)
+ goto out;
+
/*
* Upper should support d_type, else whiteouts are visible. Given
* workdir and upper are on same fs, we can do iterate_dir() on
@@ -1105,7 +1139,8 @@ out:
return err;
}
-static int ovl_get_workdir(struct ovl_fs *ofs, struct path *upperpath)
+static int ovl_get_workdir(struct super_block *sb, struct ovl_fs *ofs,
+ struct path *upperpath)
{
int err;
struct path workpath = { };
@@ -1136,19 +1171,16 @@ static int ovl_get_workdir(struct ovl_fs *ofs, struct path *upperpath)
pr_warn("overlayfs: workdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n");
}
- err = ovl_make_workdir(ofs, &workpath);
- if (err)
- goto out;
+ err = ovl_make_workdir(sb, ofs, &workpath);
- err = 0;
out:
path_put(&workpath);
return err;
}
-static int ovl_get_indexdir(struct ovl_fs *ofs, struct ovl_entry *oe,
- struct path *upperpath)
+static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs,
+ struct ovl_entry *oe, struct path *upperpath)
{
struct vfsmount *mnt = ofs->upper_mnt;
int err;
@@ -1167,6 +1199,11 @@ static int ovl_get_indexdir(struct ovl_fs *ofs, struct ovl_entry *oe,
ofs->indexdir = ovl_workdir_create(ofs, OVL_INDEXDIR_NAME, true);
if (ofs->indexdir) {
+ err = ovl_setup_trap(sb, ofs->indexdir, &ofs->indexdir_trap,
+ "indexdir");
+ if (err)
+ goto out;
+
/*
* Verify upper root is exclusively associated with index dir.
* Older kernels stored upper fh in "trusted.overlay.origin"
@@ -1226,8 +1263,8 @@ static int ovl_get_fsid(struct ovl_fs *ofs, struct super_block *sb)
return ofs->numlowerfs;
}
-static int ovl_get_lower_layers(struct ovl_fs *ofs, struct path *stack,
- unsigned int numlower)
+static int ovl_get_lower_layers(struct super_block *sb, struct ovl_fs *ofs,
+ struct path *stack, unsigned int numlower)
{
int err;
unsigned int i;
@@ -1245,16 +1282,28 @@ static int ovl_get_lower_layers(struct ovl_fs *ofs, struct path *stack,
for (i = 0; i < numlower; i++) {
struct vfsmount *mnt;
+ struct inode *trap;
int fsid;
err = fsid = ovl_get_fsid(ofs, stack[i].mnt->mnt_sb);
if (err < 0)
goto out;
+ err = -EBUSY;
+ if (ovl_is_inuse(stack[i].dentry)) {
+ pr_err("overlayfs: lowerdir is in-use as upperdir/workdir\n");
+ goto out;
+ }
+
+ err = ovl_setup_trap(sb, stack[i].dentry, &trap, "lowerdir");
+ if (err)
+ goto out;
+
mnt = clone_private_mount(&stack[i]);
err = PTR_ERR(mnt);
if (IS_ERR(mnt)) {
pr_err("overlayfs: failed to clone lowerpath\n");
+ iput(trap);
goto out;
}
@@ -1264,6 +1313,7 @@ static int ovl_get_lower_layers(struct ovl_fs *ofs, struct path *stack,
*/
mnt->mnt_flags |= MNT_READONLY | MNT_NOATIME;
+ ofs->lower_layers[ofs->numlower].trap = trap;
ofs->lower_layers[ofs->numlower].mnt = mnt;
ofs->lower_layers[ofs->numlower].idx = i + 1;
ofs->lower_layers[ofs->numlower].fsid = fsid;
@@ -1358,7 +1408,7 @@ static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb,
goto out_err;
}
- err = ovl_get_lower_layers(ofs, stack, numlower);
+ err = ovl_get_lower_layers(sb, ofs, stack, numlower);
if (err)
goto out_err;
@@ -1390,6 +1440,77 @@ out_err:
goto out;
}
+/*
+ * Check if this layer root is a descendant of:
+ * - another layer of this overlayfs instance
+ * - upper/work dir of any overlayfs instance
+ */
+static int ovl_check_layer(struct super_block *sb, struct dentry *dentry,
+ const char *name)
+{
+ struct dentry *next = dentry, *parent;
+ int err = 0;
+
+ if (!dentry)
+ return 0;
+
+ parent = dget_parent(next);
+
+ /* Walk back ancestors to root (inclusive) looking for traps */
+ while (!err && parent != next) {
+ if (ovl_is_inuse(parent)) {
+ err = -EBUSY;
+ pr_err("overlayfs: %s path overlapping in-use upperdir/workdir\n",
+ name);
+ } else if (ovl_lookup_trap_inode(sb, parent)) {
+ err = -ELOOP;
+ pr_err("overlayfs: overlapping %s path\n", name);
+ }
+ next = parent;
+ parent = dget_parent(next);
+ dput(next);
+ }
+
+ dput(parent);
+
+ return err;
+}
+
+/*
+ * Check if any of the layers or work dirs overlap.
+ */
+static int ovl_check_overlapping_layers(struct super_block *sb,
+ struct ovl_fs *ofs)
+{
+ int i, err;
+
+ if (ofs->upper_mnt) {
+ err = ovl_check_layer(sb, ofs->upper_mnt->mnt_root, "upperdir");
+ if (err)
+ return err;
+
+ /*
+ * Checking workbasedir avoids hitting ovl_is_inuse(parent) of
+ * this instance and covers overlapping work and index dirs,
+ * unless work or index dir have been moved since created inside
+ * workbasedir. In that case, we already have their traps in
+ * inode cache and we will catch that case on lookup.
+ */
+ err = ovl_check_layer(sb, ofs->workbasedir, "workdir");
+ if (err)
+ return err;
+ }
+
+ for (i = 0; i < ofs->numlower; i++) {
+ err = ovl_check_layer(sb, ofs->lower_layers[i].mnt->mnt_root,
+ "lowerdir");
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
static int ovl_fill_super(struct super_block *sb, void *data, int silent)
{
struct path upperpath = { };
@@ -1429,17 +1550,20 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
if (ofs->config.xino != OVL_XINO_OFF)
ofs->xino_bits = BITS_PER_LONG - 32;
+ /* alloc/destroy_inode needed for setting up traps in inode cache */
+ sb->s_op = &ovl_super_operations;
+
if (ofs->config.upperdir) {
if (!ofs->config.workdir) {
pr_err("overlayfs: missing 'workdir'\n");
goto out_err;
}
- err = ovl_get_upper(ofs, &upperpath);
+ err = ovl_get_upper(sb, ofs, &upperpath);
if (err)
goto out_err;
- err = ovl_get_workdir(ofs, &upperpath);
+ err = ovl_get_workdir(sb, ofs, &upperpath);
if (err)
goto out_err;
@@ -1460,7 +1584,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
sb->s_flags |= SB_RDONLY;
if (!(ovl_force_readonly(ofs)) && ofs->config.index) {
- err = ovl_get_indexdir(ofs, oe, &upperpath);
+ err = ovl_get_indexdir(sb, ofs, oe, &upperpath);
if (err)
goto out_free_oe;
@@ -1473,6 +1597,10 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
}
+ err = ovl_check_overlapping_layers(sb, ofs);
+ if (err)
+ goto out_free_oe;
+
/* Show index=off in /proc/mounts for forced r/o mount */
if (!ofs->indexdir) {
ofs->config.index = false;
@@ -1494,7 +1622,6 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
cap_lower(cred->cap_effective, CAP_SYS_RESOURCE);
sb->s_magic = OVERLAYFS_SUPER_MAGIC;
- sb->s_op = &ovl_super_operations;
sb->s_xattr = ovl_xattr_handlers;
sb->s_fs_info = ofs;
sb->s_flags |= SB_POSIXACL;
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index c9a2e3c6d537..db8bdb29b320 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -653,6 +653,18 @@ void ovl_inuse_unlock(struct dentry *dentry)
}
}
+bool ovl_is_inuse(struct dentry *dentry)
+{
+ struct inode *inode = d_inode(dentry);
+ bool inuse;
+
+ spin_lock(&inode->i_lock);
+ inuse = (inode->i_state & I_OVL_INUSE);
+ spin_unlock(&inode->i_lock);
+
+ return inuse;
+}
+
/*
* Does this overlay dentry need to be indexed on copy up?
*/
diff --git a/fs/proc/array.c b/fs/proc/array.c
index ccfef702c771..e4d0cfebaac5 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -452,7 +452,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
* a program is not able to use ptrace(2) in that case. It is
* safe because the task has stopped executing permanently.
*/
- if (permitted && (task->flags & PF_DUMPCORE)) {
+ if (permitted && (task->flags & (PF_EXITING|PF_DUMPCORE))) {
if (try_get_task_stack(task)) {
eip = KSTK_EIP(task);
esp = KSTK_ESP(task);
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index fc20e06c56ba..dd1783ea7003 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1993,8 +1993,8 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
&warn_to[cnt]);
if (ret)
goto over_quota;
- ret = dquot_add_space(transfer_to[cnt], cur_space, rsv_space, 0,
- &warn_to[cnt]);
+ ret = dquot_add_space(transfer_to[cnt], cur_space, rsv_space,
+ DQUOT_SPACE_WARN, &warn_to[cnt]);
if (ret) {
spin_lock(&transfer_to[cnt]->dq_dqb_lock);
dquot_decr_inodes(transfer_to[cnt], inode_usage);
diff --git a/fs/timerfd.c b/fs/timerfd.c
index 82d0f52414a6..f845093466be 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -471,10 +471,11 @@ static int do_timerfd_settime(int ufd, int flags,
break;
}
spin_unlock_irq(&ctx->wqh.lock);
+
if (isalarm(ctx))
- hrtimer_wait_for_timer(&ctx->t.alarm.timer);
+ hrtimer_grab_expiry_lock(&ctx->t.alarm.timer);
else
- hrtimer_wait_for_timer(&ctx->t.tmr);
+ hrtimer_grab_expiry_lock(&ctx->t.tmr);
}
/*
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index ae796e10f68b..4c46ebf0e773 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -470,13 +470,15 @@ static struct buffer_head *udf_getblk(struct inode *inode, udf_pblk_t block,
return NULL;
}
-/* Extend the file by 'blocks' blocks, return the number of extents added */
+/* Extend the file with new blocks totaling 'new_block_bytes',
+ * return the number of extents added
+ */
static int udf_do_extend_file(struct inode *inode,
struct extent_position *last_pos,
struct kernel_long_ad *last_ext,
- sector_t blocks)
+ loff_t new_block_bytes)
{
- sector_t add;
+ uint32_t add;
int count = 0, fake = !(last_ext->extLength & UDF_EXTENT_LENGTH_MASK);
struct super_block *sb = inode->i_sb;
struct kernel_lb_addr prealloc_loc = {};
@@ -486,7 +488,7 @@ static int udf_do_extend_file(struct inode *inode,
/* The previous extent is fake and we should not extend by anything
* - there's nothing to do... */
- if (!blocks && fake)
+ if (!new_block_bytes && fake)
return 0;
iinfo = UDF_I(inode);
@@ -517,13 +519,12 @@ static int udf_do_extend_file(struct inode *inode,
/* Can we merge with the previous extent? */
if ((last_ext->extLength & UDF_EXTENT_FLAG_MASK) ==
EXT_NOT_RECORDED_NOT_ALLOCATED) {
- add = ((1 << 30) - sb->s_blocksize -
- (last_ext->extLength & UDF_EXTENT_LENGTH_MASK)) >>
- sb->s_blocksize_bits;
- if (add > blocks)
- add = blocks;
- blocks -= add;
- last_ext->extLength += add << sb->s_blocksize_bits;
+ add = (1 << 30) - sb->s_blocksize -
+ (last_ext->extLength & UDF_EXTENT_LENGTH_MASK);
+ if (add > new_block_bytes)
+ add = new_block_bytes;
+ new_block_bytes -= add;
+ last_ext->extLength += add;
}
if (fake) {
@@ -544,28 +545,27 @@ static int udf_do_extend_file(struct inode *inode,
}
/* Managed to do everything necessary? */
- if (!blocks)
+ if (!new_block_bytes)
goto out;
/* All further extents will be NOT_RECORDED_NOT_ALLOCATED */
last_ext->extLocation.logicalBlockNum = 0;
last_ext->extLocation.partitionReferenceNum = 0;
- add = (1 << (30-sb->s_blocksize_bits)) - 1;
- last_ext->extLength = EXT_NOT_RECORDED_NOT_ALLOCATED |
- (add << sb->s_blocksize_bits);
+ add = (1 << 30) - sb->s_blocksize;
+ last_ext->extLength = EXT_NOT_RECORDED_NOT_ALLOCATED | add;
/* Create enough extents to cover the whole hole */
- while (blocks > add) {
- blocks -= add;
+ while (new_block_bytes > add) {
+ new_block_bytes -= add;
err = udf_add_aext(inode, last_pos, &last_ext->extLocation,
last_ext->extLength, 1);
if (err)
return err;
count++;
}
- if (blocks) {
+ if (new_block_bytes) {
last_ext->extLength = EXT_NOT_RECORDED_NOT_ALLOCATED |
- (blocks << sb->s_blocksize_bits);
+ new_block_bytes;
err = udf_add_aext(inode, last_pos, &last_ext->extLocation,
last_ext->extLength, 1);
if (err)
@@ -596,6 +596,24 @@ out:
return count;
}
+/* Extend the final block of the file to final_block_len bytes */
+static void udf_do_extend_final_block(struct inode *inode,
+ struct extent_position *last_pos,
+ struct kernel_long_ad *last_ext,
+ uint32_t final_block_len)
+{
+ struct super_block *sb = inode->i_sb;
+ uint32_t added_bytes;
+
+ added_bytes = final_block_len -
+ (last_ext->extLength & (sb->s_blocksize - 1));
+ last_ext->extLength += added_bytes;
+ UDF_I(inode)->i_lenExtents += added_bytes;
+
+ udf_write_aext(inode, last_pos, &last_ext->extLocation,
+ last_ext->extLength, 1);
+}
+
static int udf_extend_file(struct inode *inode, loff_t newsize)
{
@@ -605,10 +623,12 @@ static int udf_extend_file(struct inode *inode, loff_t newsize)
int8_t etype;
struct super_block *sb = inode->i_sb;
sector_t first_block = newsize >> sb->s_blocksize_bits, offset;
+ unsigned long partial_final_block;
int adsize;
struct udf_inode_info *iinfo = UDF_I(inode);
struct kernel_long_ad extent;
- int err;
+ int err = 0;
+ int within_final_block;
if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
adsize = sizeof(struct short_ad);
@@ -618,18 +638,8 @@ static int udf_extend_file(struct inode *inode, loff_t newsize)
BUG();
etype = inode_bmap(inode, first_block, &epos, &eloc, &elen, &offset);
+ within_final_block = (etype != -1);
- /* File has extent covering the new size (could happen when extending
- * inside a block)? */
- if (etype != -1)
- return 0;
- if (newsize & (sb->s_blocksize - 1))
- offset++;
- /* Extended file just to the boundary of the last file block? */
- if (offset == 0)
- return 0;
-
- /* Truncate is extending the file by 'offset' blocks */
if ((!epos.bh && epos.offset == udf_file_entry_alloc_offset(inode)) ||
(epos.bh && epos.offset == sizeof(struct allocExtDesc))) {
/* File has no extents at all or has empty last
@@ -643,7 +653,22 @@ static int udf_extend_file(struct inode *inode, loff_t newsize)
&extent.extLength, 0);
extent.extLength |= etype << 30;
}
- err = udf_do_extend_file(inode, &epos, &extent, offset);
+
+ partial_final_block = newsize & (sb->s_blocksize - 1);
+
+ /* File has extent covering the new size (could happen when extending
+ * inside a block)?
+ */
+ if (within_final_block) {
+ /* Extending file within the last file block */
+ udf_do_extend_final_block(inode, &epos, &extent,
+ partial_final_block);
+ } else {
+ loff_t add = ((loff_t)offset << sb->s_blocksize_bits) |
+ partial_final_block;
+ err = udf_do_extend_file(inode, &epos, &extent, add);
+ }
+
if (err < 0)
goto out;
err = 0;
@@ -745,6 +770,7 @@ static sector_t inode_getblk(struct inode *inode, sector_t block,
/* Are we beyond EOF? */
if (etype == -1) {
int ret;
+ loff_t hole_len;
isBeyondEOF = true;
if (count) {
if (c)
@@ -760,7 +786,8 @@ static sector_t inode_getblk(struct inode *inode, sector_t block,
startnum = (offset > 0);
}
/* Create extents for the hole between EOF and offset */
- ret = udf_do_extend_file(inode, &prev_epos, laarr, offset);
+ hole_len = (loff_t)offset << inode->i_blkbits;
+ ret = udf_do_extend_file(inode, &prev_epos, laarr, hole_len);
if (ret < 0) {
*err = ret;
newblock = 0;
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index aaca81b5e119..e1ebdbe40032 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -40,6 +40,16 @@ enum userfaultfd_state {
/*
* Start with fault_pending_wqh and fault_wqh so they're more likely
* to be in the same cacheline.
+ *
+ * Locking order:
+ * fd_wqh.lock
+ * fault_pending_wqh.lock
+ * fault_wqh.lock
+ * event_wqh.lock
+ *
+ * To avoid deadlocks, IRQs must be disabled when taking any of the above locks,
+ * since fd_wqh.lock is taken by aio_poll() while it's holding a lock that's
+ * also taken in IRQ context.
*/
struct userfaultfd_ctx {
/* waitqueue head for the pending (i.e. not read) userfaults */
@@ -459,7 +469,7 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
blocking_state = return_to_userland ? TASK_INTERRUPTIBLE :
TASK_KILLABLE;
- spin_lock(&ctx->fault_pending_wqh.lock);
+ spin_lock_irq(&ctx->fault_pending_wqh.lock);
/*
* After the __add_wait_queue the uwq is visible to userland
* through poll/read().
@@ -471,7 +481,7 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
* __add_wait_queue.
*/
set_current_state(blocking_state);
- spin_unlock(&ctx->fault_pending_wqh.lock);
+ spin_unlock_irq(&ctx->fault_pending_wqh.lock);
if (!is_vm_hugetlb_page(vmf->vma))
must_wait = userfaultfd_must_wait(ctx, vmf->address, vmf->flags,
@@ -553,13 +563,13 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
* kernel stack can be released after the list_del_init.
*/
if (!list_empty_careful(&uwq.wq.entry)) {
- spin_lock(&ctx->fault_pending_wqh.lock);
+ spin_lock_irq(&ctx->fault_pending_wqh.lock);
/*
* No need of list_del_init(), the uwq on the stack
* will be freed shortly anyway.
*/
list_del(&uwq.wq.entry);
- spin_unlock(&ctx->fault_pending_wqh.lock);
+ spin_unlock_irq(&ctx->fault_pending_wqh.lock);
}
/*
@@ -584,7 +594,7 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
init_waitqueue_entry(&ewq->wq, current);
release_new_ctx = NULL;
- spin_lock(&ctx->event_wqh.lock);
+ spin_lock_irq(&ctx->event_wqh.lock);
/*
* After the __add_wait_queue the uwq is visible to userland
* through poll/read().
@@ -614,15 +624,15 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
break;
}
- spin_unlock(&ctx->event_wqh.lock);
+ spin_unlock_irq(&ctx->event_wqh.lock);
wake_up_poll(&ctx->fd_wqh, EPOLLIN);
schedule();
- spin_lock(&ctx->event_wqh.lock);
+ spin_lock_irq(&ctx->event_wqh.lock);
}
__set_current_state(TASK_RUNNING);
- spin_unlock(&ctx->event_wqh.lock);
+ spin_unlock_irq(&ctx->event_wqh.lock);
if (release_new_ctx) {
struct vm_area_struct *vma;
@@ -919,10 +929,10 @@ wakeup:
* the last page faults that may have been already waiting on
* the fault_*wqh.
*/
- spin_lock(&ctx->fault_pending_wqh.lock);
+ spin_lock_irq(&ctx->fault_pending_wqh.lock);
__wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL, &range);
__wake_up(&ctx->fault_wqh, TASK_NORMAL, 1, &range);
- spin_unlock(&ctx->fault_pending_wqh.lock);
+ spin_unlock_irq(&ctx->fault_pending_wqh.lock);
/* Flush pending events that may still wait on event_wqh */
wake_up_all(&ctx->event_wqh);
@@ -1135,7 +1145,7 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
if (!ret && msg->event == UFFD_EVENT_FORK) {
ret = resolve_userfault_fork(ctx, fork_nctx, msg);
- spin_lock(&ctx->event_wqh.lock);
+ spin_lock_irq(&ctx->event_wqh.lock);
if (!list_empty(&fork_event)) {
/*
* The fork thread didn't abort, so we can
@@ -1181,7 +1191,7 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
if (ret)
userfaultfd_ctx_put(fork_nctx);
}
- spin_unlock(&ctx->event_wqh.lock);
+ spin_unlock_irq(&ctx->event_wqh.lock);
}
return ret;
@@ -1220,14 +1230,14 @@ static ssize_t userfaultfd_read(struct file *file, char __user *buf,
static void __wake_userfault(struct userfaultfd_ctx *ctx,
struct userfaultfd_wake_range *range)
{
- spin_lock(&ctx->fault_pending_wqh.lock);
+ spin_lock_irq(&ctx->fault_pending_wqh.lock);
/* wake all in the range and autoremove */
if (waitqueue_active(&ctx->fault_pending_wqh))
__wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL,
range);
if (waitqueue_active(&ctx->fault_wqh))
__wake_up(&ctx->fault_wqh, TASK_NORMAL, 1, range);
- spin_unlock(&ctx->fault_pending_wqh.lock);
+ spin_unlock_irq(&ctx->fault_pending_wqh.lock);
}
static __always_inline void wake_userfault(struct userfaultfd_ctx *ctx,
@@ -1882,7 +1892,7 @@ static void userfaultfd_show_fdinfo(struct seq_file *m, struct file *f)
wait_queue_entry_t *wq;
unsigned long pending = 0, total = 0;
- spin_lock(&ctx->fault_pending_wqh.lock);
+ spin_lock_irq(&ctx->fault_pending_wqh.lock);
list_for_each_entry(wq, &ctx->fault_pending_wqh.head, entry) {
pending++;
total++;
@@ -1890,7 +1900,7 @@ static void userfaultfd_show_fdinfo(struct seq_file *m, struct file *f)
list_for_each_entry(wq, &ctx->fault_wqh.head, entry) {
total++;
}
- spin_unlock(&ctx->fault_pending_wqh.lock);
+ spin_unlock_irq(&ctx->fault_pending_wqh.lock);
/*
* If more protocols will be added, there will be all shown