summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/cache.c8
-rw-r--r--fs/9p/v9fs.h2
-rw-r--r--fs/9p/v9fs_vfs.h23
-rw-r--r--fs/9p/vfs_file.c6
-rw-r--r--fs/9p/vfs_inode.c25
-rw-r--r--fs/9p/vfs_inode_dotl.c27
-rw-r--r--fs/9p/vfs_super.c4
-rw-r--r--fs/aio.c2
-rw-r--r--fs/autofs4/expire.c3
-rw-r--r--fs/autofs4/inode.c4
-rw-r--r--fs/btrfs/Makefile2
-rw-r--r--fs/btrfs/ctree.c14
-rw-r--r--fs/btrfs/ctree.h141
-rw-r--r--fs/btrfs/dev-replace.c2
-rw-r--r--fs/btrfs/disk-io.c88
-rw-r--r--fs/btrfs/extent-tree.c112
-rw-r--r--fs/btrfs/extent_io.c47
-rw-r--r--fs/btrfs/extent_io.h19
-rw-r--r--fs/btrfs/extent_map.c2
-rw-r--r--fs/btrfs/extent_map.h10
-rw-r--r--fs/btrfs/scrub.c2
-rw-r--r--fs/btrfs/send.c11
-rw-r--r--fs/btrfs/struct-funcs.c9
-rw-r--r--fs/btrfs/tree-checker.c649
-rw-r--r--fs/btrfs/tree-checker.h38
-rw-r--r--fs/btrfs/volumes.c139
-rw-r--r--fs/btrfs/volumes.h2
-rw-r--r--fs/cachefiles/rdwr.c3
-rw-r--r--fs/ceph/caps.c1
-rw-r--r--fs/ceph/snap.c3
-rw-r--r--fs/cifs/connect.c53
-rw-r--r--fs/cifs/file.c28
-rw-r--r--fs/cifs/readdir.c9
-rw-r--r--fs/cifs/smb2file.c8
-rw-r--r--fs/cifs/smb2maperror.c4
-rw-r--r--fs/cifs/smb2ops.c6
-rw-r--r--fs/cifs/smb2pdu.c4
-rw-r--r--fs/cifs/smb2pdu.h4
-rw-r--r--fs/cifs/transport.c2
-rw-r--r--fs/dcache.c6
-rw-r--r--fs/debugfs/inode.c7
-rw-r--r--fs/dlm/ast.c10
-rw-r--r--fs/dlm/lock.c17
-rw-r--r--fs/dlm/lockspace.c2
-rw-r--r--fs/drop_caches.c8
-rw-r--r--fs/eventpoll.c2
-rw-r--r--fs/exec.c9
-rw-r--r--fs/exportfs/expfs.c2
-rw-r--r--fs/ext2/super.c39
-rw-r--r--fs/ext4/inline.c11
-rw-r--r--fs/ext4/resize.c5
-rw-r--r--fs/ext4/super.c13
-rw-r--r--fs/f2fs/acl.c14
-rw-r--r--fs/f2fs/checkpoint.c168
-rw-r--r--fs/f2fs/data.c118
-rw-r--r--fs/f2fs/dir.c87
-rw-r--r--fs/f2fs/f2fs.h77
-rw-r--r--fs/f2fs/file.c24
-rw-r--r--fs/f2fs/inline.c115
-rw-r--r--fs/f2fs/inode.c68
-rw-r--r--fs/f2fs/node.c107
-rw-r--r--fs/f2fs/node.h77
-rw-r--r--fs/f2fs/recovery.c165
-rw-r--r--fs/f2fs/segment.c98
-rw-r--r--fs/f2fs/segment.h48
-rw-r--r--fs/f2fs/super.c134
-rw-r--r--fs/fscache/object.c3
-rw-r--r--fs/fuse/dev.c5
-rw-r--r--fs/fuse/file.c2
-rw-r--r--fs/hfs/btree.c3
-rw-r--r--fs/hfsplus/btree.c3
-rw-r--r--fs/hugetlbfs/inode.c42
-rw-r--r--fs/jbd2/transaction.c33
-rw-r--r--fs/jffs2/super.c3
-rw-r--r--fs/ncpfs/ioctl.c4
-rw-r--r--fs/nfs/direct.c15
-rw-r--r--fs/nfs/filelayout/filelayout.c17
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c25
-rw-r--r--fs/nfs/pagelist.c12
-rw-r--r--fs/nfs/pnfs.c24
-rw-r--r--fs/nfs/read.c2
-rw-r--r--fs/nfs/super.c8
-rw-r--r--fs/nfsd/nfs3proc.c16
-rw-r--r--fs/nfsd/nfs3xdr.c1
-rw-r--r--fs/nfsd/nfsctl.c2
-rw-r--r--fs/ocfs2/buffer_head_io.c2
-rw-r--r--fs/ocfs2/export.c2
-rw-r--r--fs/ocfs2/localalloc.c9
-rw-r--r--fs/ocfs2/move_extents.c47
-rw-r--r--fs/proc/array.c2
-rw-r--r--fs/proc/base.c19
-rw-r--r--fs/pstore/platform.c4
-rw-r--r--fs/pstore/ram_core.c5
-rw-r--r--fs/read_write.c4
-rw-r--r--fs/super.c30
-rw-r--r--fs/sysv/inode.c2
-rw-r--r--fs/udf/inode.c6
-rw-r--r--fs/xfs/libxfs/xfs_attr.c9
98 files changed, 2499 insertions, 809 deletions
diff --git a/fs/9p/cache.c b/fs/9p/cache.c
index a69260f27555..103ca5e1267b 100644
--- a/fs/9p/cache.c
+++ b/fs/9p/cache.c
@@ -243,14 +243,14 @@ void v9fs_cache_inode_set_cookie(struct inode *inode, struct file *filp)
if (!v9inode->fscache)
return;
- spin_lock(&v9inode->fscache_lock);
+ mutex_lock(&v9inode->fscache_lock);
if ((filp->f_flags & O_ACCMODE) != O_RDONLY)
v9fs_cache_inode_flush_cookie(inode);
else
v9fs_cache_inode_get_cookie(inode);
- spin_unlock(&v9inode->fscache_lock);
+ mutex_unlock(&v9inode->fscache_lock);
}
void v9fs_cache_inode_reset_cookie(struct inode *inode)
@@ -264,7 +264,7 @@ void v9fs_cache_inode_reset_cookie(struct inode *inode)
old = v9inode->fscache;
- spin_lock(&v9inode->fscache_lock);
+ mutex_lock(&v9inode->fscache_lock);
fscache_relinquish_cookie(v9inode->fscache, 1);
v9ses = v9fs_inode2v9ses(inode);
@@ -274,7 +274,7 @@ void v9fs_cache_inode_reset_cookie(struct inode *inode)
p9_debug(P9_DEBUG_FSC, "inode %p revalidating cookie old %p new %p\n",
inode, old, v9inode->fscache);
- spin_unlock(&v9inode->fscache_lock);
+ mutex_unlock(&v9inode->fscache_lock);
}
int __v9fs_fscache_release_page(struct page *page, gfp_t gfp)
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 0923f2cf3c80..6877050384a1 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -123,7 +123,7 @@ struct v9fs_session_info {
struct v9fs_inode {
#ifdef CONFIG_9P_FSCACHE
- spinlock_t fscache_lock;
+ struct mutex fscache_lock;
struct fscache_cookie *fscache;
#endif
struct p9_qid qid;
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index 5a0db6dec8d1..aaee1e6584e6 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -40,6 +40,9 @@
*/
#define P9_LOCK_TIMEOUT (30*HZ)
+/* flags for v9fs_stat2inode() & v9fs_stat2inode_dotl() */
+#define V9FS_STAT2INODE_KEEP_ISIZE 1
+
extern struct file_system_type v9fs_fs_type;
extern const struct address_space_operations v9fs_addr_operations;
extern const struct file_operations v9fs_file_operations;
@@ -61,8 +64,10 @@ int v9fs_init_inode(struct v9fs_session_info *v9ses,
struct inode *inode, umode_t mode, dev_t);
void v9fs_evict_inode(struct inode *inode);
ino_t v9fs_qid2ino(struct p9_qid *qid);
-void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *);
-void v9fs_stat2inode_dotl(struct p9_stat_dotl *, struct inode *);
+void v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
+ struct super_block *sb, unsigned int flags);
+void v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode,
+ unsigned int flags);
int v9fs_dir_release(struct inode *inode, struct file *filp);
int v9fs_file_open(struct inode *inode, struct file *file);
void v9fs_inode2stat(struct inode *inode, struct p9_wstat *stat);
@@ -83,4 +88,18 @@ static inline void v9fs_invalidate_inode_attr(struct inode *inode)
}
int v9fs_open_to_dotl_flags(int flags);
+
+static inline void v9fs_i_size_write(struct inode *inode, loff_t i_size)
+{
+ /*
+ * 32-bit need the lock, concurrent updates could break the
+ * sequences and make i_size_read() loop forever.
+ * 64-bit updates are atomic and can skip the locking.
+ */
+ if (sizeof(i_size) > sizeof(long))
+ spin_lock(&inode->i_lock);
+ i_size_write(inode, i_size);
+ if (sizeof(i_size) > sizeof(long))
+ spin_unlock(&inode->i_lock);
+}
#endif
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index e7b3d2c4472d..62ce8b4a7e5f 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -442,7 +442,11 @@ v9fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
i_size = i_size_read(inode);
if (iocb->ki_pos > i_size) {
inode_add_bytes(inode, iocb->ki_pos - i_size);
- i_size_write(inode, iocb->ki_pos);
+ /*
+ * Need to serialize against i_size_write() in
+ * v9fs_stat2inode()
+ */
+ v9fs_i_size_write(inode, iocb->ki_pos);
}
return retval;
}
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 73f1d1b3a51c..2de1505aedfd 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -244,7 +244,7 @@ struct inode *v9fs_alloc_inode(struct super_block *sb)
return NULL;
#ifdef CONFIG_9P_FSCACHE
v9inode->fscache = NULL;
- spin_lock_init(&v9inode->fscache_lock);
+ mutex_init(&v9inode->fscache_lock);
#endif
v9inode->writeback_fid = NULL;
v9inode->cache_validity = 0;
@@ -538,7 +538,7 @@ static struct inode *v9fs_qid_iget(struct super_block *sb,
if (retval)
goto error;
- v9fs_stat2inode(st, inode, sb);
+ v9fs_stat2inode(st, inode, sb, 0);
v9fs_cache_inode_get_cookie(inode);
unlock_new_inode(inode);
return inode;
@@ -1074,7 +1074,7 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
if (IS_ERR(st))
return PTR_ERR(st);
- v9fs_stat2inode(st, d_inode(dentry), d_inode(dentry)->i_sb);
+ v9fs_stat2inode(st, d_inode(dentry), d_inode(dentry)->i_sb, 0);
generic_fillattr(d_inode(dentry), stat);
p9stat_free(st);
@@ -1152,12 +1152,13 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
* @stat: Plan 9 metadata (mistat) structure
* @inode: inode to populate
* @sb: superblock of filesystem
+ * @flags: control flags (e.g. V9FS_STAT2INODE_KEEP_ISIZE)
*
*/
void
v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
- struct super_block *sb)
+ struct super_block *sb, unsigned int flags)
{
umode_t mode;
char ext[32];
@@ -1198,10 +1199,11 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
mode = p9mode2perm(v9ses, stat);
mode |= inode->i_mode & ~S_IALLUGO;
inode->i_mode = mode;
- i_size_write(inode, stat->length);
+ if (!(flags & V9FS_STAT2INODE_KEEP_ISIZE))
+ v9fs_i_size_write(inode, stat->length);
/* not real number of blocks, but 512 byte ones ... */
- inode->i_blocks = (i_size_read(inode) + 512 - 1) >> 9;
+ inode->i_blocks = (stat->length + 512 - 1) >> 9;
v9inode->cache_validity &= ~V9FS_INO_INVALID_ATTR;
}
@@ -1389,9 +1391,9 @@ int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode)
{
int umode;
dev_t rdev;
- loff_t i_size;
struct p9_wstat *st;
struct v9fs_session_info *v9ses;
+ unsigned int flags;
v9ses = v9fs_inode2v9ses(inode);
st = p9_client_stat(fid);
@@ -1404,16 +1406,13 @@ int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode)
if ((inode->i_mode & S_IFMT) != (umode & S_IFMT))
goto out;
- spin_lock(&inode->i_lock);
/*
* We don't want to refresh inode->i_size,
* because we may have cached data
*/
- i_size = inode->i_size;
- v9fs_stat2inode(st, inode, inode->i_sb);
- if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
- inode->i_size = i_size;
- spin_unlock(&inode->i_lock);
+ flags = (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) ?
+ V9FS_STAT2INODE_KEEP_ISIZE : 0;
+ v9fs_stat2inode(st, inode, inode->i_sb, flags);
out:
p9stat_free(st);
kfree(st);
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 0b88744c6446..7ae67fcca031 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -143,7 +143,7 @@ static struct inode *v9fs_qid_iget_dotl(struct super_block *sb,
if (retval)
goto error;
- v9fs_stat2inode_dotl(st, inode);
+ v9fs_stat2inode_dotl(st, inode, 0);
v9fs_cache_inode_get_cookie(inode);
retval = v9fs_get_acl(inode, fid);
if (retval)
@@ -498,7 +498,7 @@ v9fs_vfs_getattr_dotl(struct vfsmount *mnt, struct dentry *dentry,
if (IS_ERR(st))
return PTR_ERR(st);
- v9fs_stat2inode_dotl(st, d_inode(dentry));
+ v9fs_stat2inode_dotl(st, d_inode(dentry), 0);
generic_fillattr(d_inode(dentry), stat);
/* Change block size to what the server returned */
stat->blksize = st->st_blksize;
@@ -609,11 +609,13 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
* v9fs_stat2inode_dotl - populate an inode structure with stat info
* @stat: stat structure
* @inode: inode to populate
+ * @flags: ctrl flags (e.g. V9FS_STAT2INODE_KEEP_ISIZE)
*
*/
void
-v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
+v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode,
+ unsigned int flags)
{
umode_t mode;
struct v9fs_inode *v9inode = V9FS_I(inode);
@@ -633,7 +635,8 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
mode |= inode->i_mode & ~S_IALLUGO;
inode->i_mode = mode;
- i_size_write(inode, stat->st_size);
+ if (!(flags & V9FS_STAT2INODE_KEEP_ISIZE))
+ v9fs_i_size_write(inode, stat->st_size);
inode->i_blocks = stat->st_blocks;
} else {
if (stat->st_result_mask & P9_STATS_ATIME) {
@@ -663,8 +666,9 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
}
if (stat->st_result_mask & P9_STATS_RDEV)
inode->i_rdev = new_decode_dev(stat->st_rdev);
- if (stat->st_result_mask & P9_STATS_SIZE)
- i_size_write(inode, stat->st_size);
+ if (!(flags & V9FS_STAT2INODE_KEEP_ISIZE) &&
+ stat->st_result_mask & P9_STATS_SIZE)
+ v9fs_i_size_write(inode, stat->st_size);
if (stat->st_result_mask & P9_STATS_BLOCKS)
inode->i_blocks = stat->st_blocks;
}
@@ -926,9 +930,9 @@ v9fs_vfs_follow_link_dotl(struct dentry *dentry, void **cookie)
int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode)
{
- loff_t i_size;
struct p9_stat_dotl *st;
struct v9fs_session_info *v9ses;
+ unsigned int flags;
v9ses = v9fs_inode2v9ses(inode);
st = p9_client_getattr_dotl(fid, P9_STATS_ALL);
@@ -940,16 +944,13 @@ int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode)
if ((inode->i_mode & S_IFMT) != (st->st_mode & S_IFMT))
goto out;
- spin_lock(&inode->i_lock);
/*
* We don't want to refresh inode->i_size,
* because we may have cached data
*/
- i_size = inode->i_size;
- v9fs_stat2inode_dotl(st, inode);
- if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
- inode->i_size = i_size;
- spin_unlock(&inode->i_lock);
+ flags = (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) ?
+ V9FS_STAT2INODE_KEEP_ISIZE : 0;
+ v9fs_stat2inode_dotl(st, inode, flags);
out:
kfree(st);
return 0;
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index bf495cedec26..ccf935d9e722 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -165,7 +165,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
goto release_sb;
}
d_inode(root)->i_ino = v9fs_qid2ino(&st->qid);
- v9fs_stat2inode_dotl(st, d_inode(root));
+ v9fs_stat2inode_dotl(st, d_inode(root), 0);
kfree(st);
} else {
struct p9_wstat *st = NULL;
@@ -176,7 +176,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
}
d_inode(root)->i_ino = v9fs_qid2ino(&st->qid);
- v9fs_stat2inode(st, d_inode(root), sb);
+ v9fs_stat2inode(st, d_inode(root), sb, 0);
p9stat_free(st);
kfree(st);
diff --git a/fs/aio.c b/fs/aio.c
index c283eb03cb38..7187d03aa0bc 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -40,6 +40,7 @@
#include <linux/ramfs.h>
#include <linux/percpu-refcount.h>
#include <linux/mount.h>
+#include <linux/nospec.h>
#include <asm/kmap_types.h>
#include <asm/uaccess.h>
@@ -1063,6 +1064,7 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
if (!table || id >= table->nr)
goto out;
+ id = array_index_nospec(id, table->nr);
ctx = rcu_dereference(table->table[id]);
if (ctx && ctx->user_id == ctx_id) {
if (percpu_ref_tryget_live(&ctx->users))
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 7a5a598a2d94..0d8b9c4f27f2 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -560,7 +560,6 @@ int autofs4_expire_run(struct super_block *sb,
pkt.len = dentry->d_name.len;
memcpy(pkt.name, dentry->d_name.name, pkt.len);
pkt.name[pkt.len] = '\0';
- dput(dentry);
if ( copy_to_user(pkt_p, &pkt, sizeof(struct autofs_packet_expire)) )
ret = -EFAULT;
@@ -573,6 +572,8 @@ int autofs4_expire_run(struct super_block *sb,
complete_all(&ino->expire_complete);
spin_unlock(&sbi->fs_lock);
+ dput(dentry);
+
return ret;
}
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 1132fe71b312..0fd472d67029 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -255,8 +255,10 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
}
root_inode = autofs4_get_inode(s, S_IFDIR | 0755);
root = d_make_root(root_inode);
- if (!root)
+ if (!root) {
+ ret = -ENOMEM;
goto fail_ino;
+ }
pipe = NULL;
root->d_fsdata = ino;
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index 6d1d0b93b1aa..c792df826e12 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -9,7 +9,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
export.o tree-log.o free-space-cache.o zlib.o lzo.o \
compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
- uuid-tree.o props.o hash.o
+ uuid-tree.o props.o hash.o tree-checker.o
btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 38ee08675468..8f4baa3cb992 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1726,20 +1726,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
return err;
}
-/*
- * The leaf data grows from end-to-front in the node.
- * this returns the address of the start of the last item,
- * which is the stop of the leaf data stack
- */
-static inline unsigned int leaf_data_end(struct btrfs_root *root,
- struct extent_buffer *leaf)
-{
- u32 nr = btrfs_header_nritems(leaf);
- if (nr == 0)
- return BTRFS_LEAF_DATA_SIZE(root);
- return btrfs_item_offset_nr(leaf, nr - 1);
-}
-
/*
* search for key in the extent_buffer. The items start at offset p,
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index e847573c6db0..4a91d3119e59 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -35,6 +35,7 @@
#include <linux/btrfs.h>
#include <linux/workqueue.h>
#include <linux/security.h>
+#include <linux/sizes.h>
#include "extent_io.h"
#include "extent_map.h"
#include "async-thread.h"
@@ -897,6 +898,7 @@ struct btrfs_balance_item {
#define BTRFS_FILE_EXTENT_INLINE 0
#define BTRFS_FILE_EXTENT_REG 1
#define BTRFS_FILE_EXTENT_PREALLOC 2
+#define BTRFS_FILE_EXTENT_TYPES 2
struct btrfs_file_extent_item {
/*
@@ -2283,7 +2285,7 @@ do { \
#define BTRFS_INODE_ROOT_ITEM_INIT (1 << 31)
struct btrfs_map_token {
- struct extent_buffer *eb;
+ const struct extent_buffer *eb;
char *kaddr;
unsigned long offset;
};
@@ -2314,18 +2316,19 @@ static inline void btrfs_init_map_token (struct btrfs_map_token *token)
sizeof(((type *)0)->member)))
#define DECLARE_BTRFS_SETGET_BITS(bits) \
-u##bits btrfs_get_token_##bits(struct extent_buffer *eb, void *ptr, \
- unsigned long off, \
- struct btrfs_map_token *token); \
-void btrfs_set_token_##bits(struct extent_buffer *eb, void *ptr, \
+u##bits btrfs_get_token_##bits(const struct extent_buffer *eb, \
+ const void *ptr, unsigned long off, \
+ struct btrfs_map_token *token); \
+void btrfs_set_token_##bits(struct extent_buffer *eb, const void *ptr, \
unsigned long off, u##bits val, \
struct btrfs_map_token *token); \
-static inline u##bits btrfs_get_##bits(struct extent_buffer *eb, void *ptr, \
+static inline u##bits btrfs_get_##bits(const struct extent_buffer *eb, \
+ const void *ptr, \
unsigned long off) \
{ \
return btrfs_get_token_##bits(eb, ptr, off, NULL); \
} \
-static inline void btrfs_set_##bits(struct extent_buffer *eb, void *ptr, \
+static inline void btrfs_set_##bits(struct extent_buffer *eb, void *ptr,\
unsigned long off, u##bits val) \
{ \
btrfs_set_token_##bits(eb, ptr, off, val, NULL); \
@@ -2337,7 +2340,8 @@ DECLARE_BTRFS_SETGET_BITS(32)
DECLARE_BTRFS_SETGET_BITS(64)
#define BTRFS_SETGET_FUNCS(name, type, member, bits) \
-static inline u##bits btrfs_##name(struct extent_buffer *eb, type *s) \
+static inline u##bits btrfs_##name(const struct extent_buffer *eb, \
+ const type *s) \
{ \
BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \
return btrfs_get_##bits(eb, s, offsetof(type, member)); \
@@ -2348,7 +2352,8 @@ static inline void btrfs_set_##name(struct extent_buffer *eb, type *s, \
BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \
btrfs_set_##bits(eb, s, offsetof(type, member), val); \
} \
-static inline u##bits btrfs_token_##name(struct extent_buffer *eb, type *s, \
+static inline u##bits btrfs_token_##name(const struct extent_buffer *eb,\
+ const type *s, \
struct btrfs_map_token *token) \
{ \
BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \
@@ -2363,9 +2368,9 @@ static inline void btrfs_set_token_##name(struct extent_buffer *eb, \
}
#define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \
-static inline u##bits btrfs_##name(struct extent_buffer *eb) \
+static inline u##bits btrfs_##name(const struct extent_buffer *eb) \
{ \
- type *p = page_address(eb->pages[0]); \
+ const type *p = page_address(eb->pages[0]); \
u##bits res = le##bits##_to_cpu(p->member); \
return res; \
} \
@@ -2377,7 +2382,7 @@ static inline void btrfs_set_##name(struct extent_buffer *eb, \
}
#define BTRFS_SETGET_STACK_FUNCS(name, type, member, bits) \
-static inline u##bits btrfs_##name(type *s) \
+static inline u##bits btrfs_##name(const type *s) \
{ \
return le##bits##_to_cpu(s->member); \
} \
@@ -2678,7 +2683,7 @@ static inline unsigned long btrfs_node_key_ptr_offset(int nr)
sizeof(struct btrfs_key_ptr) * nr;
}
-void btrfs_node_key(struct extent_buffer *eb,
+void btrfs_node_key(const struct extent_buffer *eb,
struct btrfs_disk_key *disk_key, int nr);
static inline void btrfs_set_node_key(struct extent_buffer *eb,
@@ -2707,28 +2712,28 @@ static inline struct btrfs_item *btrfs_item_nr(int nr)
return (struct btrfs_item *)btrfs_item_nr_offset(nr);
}
-static inline u32 btrfs_item_end(struct extent_buffer *eb,
+static inline u32 btrfs_item_end(const struct extent_buffer *eb,
struct btrfs_item *item)
{
return btrfs_item_offset(eb, item) + btrfs_item_size(eb, item);
}
-static inline u32 btrfs_item_end_nr(struct extent_buffer *eb, int nr)
+static inline u32 btrfs_item_end_nr(const struct extent_buffer *eb, int nr)
{
return btrfs_item_end(eb, btrfs_item_nr(nr));
}
-static inline u32 btrfs_item_offset_nr(struct extent_buffer *eb, int nr)
+static inline u32 btrfs_item_offset_nr(const struct extent_buffer *eb, int nr)
{
return btrfs_item_offset(eb, btrfs_item_nr(nr));
}
-static inline u32 btrfs_item_size_nr(struct extent_buffer *eb, int nr)
+static inline u32 btrfs_item_size_nr(const struct extent_buffer *eb, int nr)
{
return btrfs_item_size(eb, btrfs_item_nr(nr));
}
-static inline void btrfs_item_key(struct extent_buffer *eb,
+static inline void btrfs_item_key(const struct extent_buffer *eb,
struct btrfs_disk_key *disk_key, int nr)
{
struct btrfs_item *item = btrfs_item_nr(nr);
@@ -2764,8 +2769,8 @@ BTRFS_SETGET_STACK_FUNCS(stack_dir_name_len, struct btrfs_dir_item,
BTRFS_SETGET_STACK_FUNCS(stack_dir_transid, struct btrfs_dir_item,
transid, 64);
-static inline void btrfs_dir_item_key(struct extent_buffer *eb,
- struct btrfs_dir_item *item,
+static inline void btrfs_dir_item_key(const struct extent_buffer *eb,
+ const struct btrfs_dir_item *item,
struct btrfs_disk_key *key)
{
read_eb_member(eb, item, struct btrfs_dir_item, location, key);
@@ -2773,7 +2778,7 @@ static inline void btrfs_dir_item_key(struct extent_buffer *eb,
static inline void btrfs_set_dir_item_key(struct extent_buffer *eb,
struct btrfs_dir_item *item,
- struct btrfs_disk_key *key)
+ const struct btrfs_disk_key *key)
{
write_eb_member(eb, item, struct btrfs_dir_item, location, key);
}
@@ -2785,8 +2790,8 @@ BTRFS_SETGET_FUNCS(free_space_bitmaps, struct btrfs_free_space_header,
BTRFS_SETGET_FUNCS(free_space_generation, struct btrfs_free_space_header,
generation, 64);
-static inline void btrfs_free_space_key(struct extent_buffer *eb,
- struct btrfs_free_space_header *h,
+static inline void btrfs_free_space_key(const struct extent_buffer *eb,
+ const struct btrfs_free_space_header *h,
struct btrfs_disk_key *key)
{
read_eb_member(eb, h, struct btrfs_free_space_header, location, key);
@@ -2794,7 +2799,7 @@ static inline void btrfs_free_space_key(struct extent_buffer *eb,
static inline void btrfs_set_free_space_key(struct extent_buffer *eb,
struct btrfs_free_space_header *h,
- struct btrfs_disk_key *key)
+ const struct btrfs_disk_key *key)
{
write_eb_member(eb, h, struct btrfs_free_space_header, location, key);
}
@@ -2821,25 +2826,25 @@ static inline void btrfs_cpu_key_to_disk(struct btrfs_disk_key *disk,
disk->objectid = cpu_to_le64(cpu->objectid);
}
-static inline void btrfs_node_key_to_cpu(struct extent_buffer *eb,
- struct btrfs_key *key, int nr)
+static inline void btrfs_node_key_to_cpu(const struct extent_buffer *eb,
+ struct btrfs_key *key, int nr)
{
struct btrfs_disk_key disk_key;
btrfs_node_key(eb, &disk_key, nr);
btrfs_disk_key_to_cpu(key, &disk_key);
}
-static inline void btrfs_item_key_to_cpu(struct extent_buffer *eb,
- struct btrfs_key *key, int nr)
+static inline void btrfs_item_key_to_cpu(const struct extent_buffer *eb,
+ struct btrfs_key *key, int nr)
{
struct btrfs_disk_key disk_key;
btrfs_item_key(eb, &disk_key, nr);
btrfs_disk_key_to_cpu(key, &disk_key);
}
-static inline void btrfs_dir_item_key_to_cpu(struct extent_buffer *eb,
- struct btrfs_dir_item *item,
- struct btrfs_key *key)
+static inline void btrfs_dir_item_key_to_cpu(const struct extent_buffer *eb,
+ const struct btrfs_dir_item *item,
+ struct btrfs_key *key)
{
struct btrfs_disk_key disk_key;
btrfs_dir_item_key(eb, item, &disk_key);
@@ -2872,7 +2877,7 @@ BTRFS_SETGET_STACK_FUNCS(stack_header_nritems, struct btrfs_header,
nritems, 32);
BTRFS_SETGET_STACK_FUNCS(stack_header_bytenr, struct btrfs_header, bytenr, 64);
-static inline int btrfs_header_flag(struct extent_buffer *eb, u64 flag)
+static inline int btrfs_header_flag(const struct extent_buffer *eb, u64 flag)
{
return (btrfs_header_flags(eb) & flag) == flag;
}
@@ -2891,7 +2896,7 @@ static inline int btrfs_clear_header_flag(struct extent_buffer *eb, u64 flag)
return (flags & flag) == flag;
}
-static inline int btrfs_header_backref_rev(struct extent_buffer *eb)
+static inline int btrfs_header_backref_rev(const struct extent_buffer *eb)
{
u64 flags = btrfs_header_flags(eb);
return flags >> BTRFS_BACKREF_REV_SHIFT;
@@ -2911,12 +2916,12 @@ static inline unsigned long btrfs_header_fsid(void)
return offsetof(struct btrfs_header, fsid);
}
-static inline unsigned long btrfs_header_chunk_tree_uuid(struct extent_buffer *eb)
+static inline unsigned long btrfs_header_chunk_tree_uuid(const struct extent_buffer *eb)
{
return offsetof(struct btrfs_header, chunk_tree_uuid);
}
-static inline int btrfs_is_leaf(struct extent_buffer *eb)
+static inline int btrfs_is_leaf(const struct extent_buffer *eb)
{
return btrfs_header_level(eb) == 0;
}
@@ -2950,12 +2955,12 @@ BTRFS_SETGET_STACK_FUNCS(root_stransid, struct btrfs_root_item,
BTRFS_SETGET_STACK_FUNCS(root_rtransid, struct btrfs_root_item,
rtransid, 64);
-static inline bool btrfs_root_readonly(struct btrfs_root *root)
+static inline bool btrfs_root_readonly(const struct btrfs_root *root)
{
return (root->root_item.flags & cpu_to_le64(BTRFS_ROOT_SUBVOL_RDONLY)) != 0;
}
-static inline bool btrfs_root_dead(struct btrfs_root *root)
+static inline bool btrfs_root_dead(const struct btrfs_root *root)
{
return (root->root_item.flags & cpu_to_le64(BTRFS_ROOT_SUBVOL_DEAD)) != 0;
}
@@ -3012,51 +3017,51 @@ BTRFS_SETGET_STACK_FUNCS(backup_num_devices, struct btrfs_root_backup,
/* struct btrfs_balance_item */
BTRFS_SETGET_FUNCS(balance_flags, struct btrfs_balance_item, flags, 64);
-static inline void btrfs_balance_data(struct extent_buffer *eb,
- struct btrfs_balance_item *bi,
+static inline void btrfs_balance_data(const struct extent_buffer *eb,
+ const struct btrfs_balance_item *bi,
struct btrfs_disk_balance_args *ba)
{
read_eb_member(eb, bi, struct btrfs_balance_item, data, ba);
}
static inline void btrfs_set_balance_data(struct extent_buffer *eb,
- struct btrfs_balance_item *bi,
- struct btrfs_disk_balance_args *ba)
+ struct btrfs_balance_item *bi,
+ const struct btrfs_disk_balance_args *ba)
{
write_eb_member(eb, bi, struct btrfs_balance_item, data, ba);
}
-static inline void btrfs_balance_meta(struct extent_buffer *eb,
- struct btrfs_balance_item *bi,
+static inline void btrfs_balance_meta(const struct extent_buffer *eb,
+ const struct btrfs_balance_item *bi,
struct btrfs_disk_balance_args *ba)
{
read_eb_member(eb, bi, struct btrfs_balance_item, meta, ba);
}
static inline void btrfs_set_balance_meta(struct extent_buffer *eb,
- struct btrfs_balance_item *bi,
- struct btrfs_disk_balance_args *ba)
+ struct btrfs_balance_item *bi,
+ const struct btrfs_disk_balance_args *ba)
{
write_eb_member(eb, bi, struct btrfs_balance_item, meta, ba);
}
-static inline void btrfs_balance_sys(struct extent_buffer *eb,
- struct btrfs_balance_item *bi,
+static inline void btrfs_balance_sys(const struct extent_buffer *eb,
+ const struct btrfs_balance_item *bi,
struct btrfs_disk_balance_args *ba)
{
read_eb_member(eb, bi, struct btrfs_balance_item, sys, ba);
}
static inline void btrfs_set_balance_sys(struct extent_buffer *eb,
- struct btrfs_balance_item *bi,
- struct btrfs_disk_balance_args *ba)
+ struct btrfs_balance_item *bi,
+ const struct btrfs_disk_balance_args *ba)
{
write_eb_member(eb, bi, struct btrfs_balance_item, sys, ba);
}
static inline void
btrfs_disk_balance_args_to_cpu(struct btrfs_balance_args *cpu,
- struct btrfs_disk_balance_args *disk)
+ const struct btrfs_disk_balance_args *disk)
{
memset(cpu, 0, sizeof(*cpu));
@@ -3076,7 +3081,7 @@ btrfs_disk_balance_args_to_cpu(struct btrfs_balance_args *cpu,
static inline void
btrfs_cpu_balance_args_to_disk(struct btrfs_disk_balance_args *disk,
- struct btrfs_balance_args *cpu)
+ const struct btrfs_balance_args *cpu)
{
memset(disk, 0, sizeof(*disk));
@@ -3144,7 +3149,7 @@ BTRFS_SETGET_STACK_FUNCS(super_magic, struct btrfs_super_block, magic, 64);
BTRFS_SETGET_STACK_FUNCS(super_uuid_tree_generation, struct btrfs_super_block,
uuid_tree_generation, 64);
-static inline int btrfs_super_csum_size(struct btrfs_super_block *s)
+static inline int btrfs_super_csum_size(const struct btrfs_super_block *s)
{
u16 t = btrfs_super_csum_type(s);
/*
@@ -3158,6 +3163,21 @@ static inline unsigned long btrfs_leaf_data(struct extent_buffer *l)
return offsetof(struct btrfs_leaf, items);
}
+/*
+ * The leaf data grows from end-to-front in the node.
+ * this returns the address of the start of the last item,
+ * which is the stop of the leaf data stack
+ */
+static inline unsigned int leaf_data_end(const struct btrfs_root *root,
+ const struct extent_buffer *leaf)
+{
+ u32 nr = btrfs_header_nritems(leaf);
+
+ if (nr == 0)
+ return BTRFS_LEAF_DATA_SIZE(root);
+ return btrfs_item_offset_nr(leaf, nr - 1);
+}
+
/* struct btrfs_file_extent_item */
BTRFS_SETGET_FUNCS(file_extent_type, struct btrfs_file_extent_item, type, 8);
BTRFS_SETGET_STACK_FUNCS(stack_file_extent_disk_bytenr,
@@ -3174,7 +3194,7 @@ BTRFS_SETGET_STACK_FUNCS(stack_file_extent_compression,
struct btrfs_file_extent_item, compression, 8);
static inline unsigned long
-btrfs_file_extent_inline_start(struct btrfs_file_extent_item *e)
+btrfs_file_extent_inline_start(const struct btrfs_file_extent_item *e)
{
return (unsigned long)e + BTRFS_FILE_EXTENT_INLINE_DATA_START;
}
@@ -3208,8 +3228,9 @@ BTRFS_SETGET_FUNCS(file_extent_other_encoding, struct btrfs_file_extent_item,
* size of any extent headers. If a file is compressed on disk, this is
* the compressed size
*/
-static inline u32 btrfs_file_extent_inline_item_len(struct extent_buffer *eb,
- struct btrfs_item *e)
+static inline u32 btrfs_file_extent_inline_item_len(
+ const struct extent_buffer *eb,
+ struct btrfs_item *e)
{
return btrfs_item_size(eb, e) - BTRFS_FILE_EXTENT_INLINE_DATA_START;
}
@@ -3217,9 +3238,9 @@ static inline u32 btrfs_file_extent_inline_item_len(struct extent_buffer *eb,
/* this returns the number of file bytes represented by the inline item.
* If an item is compressed, this is the uncompressed size
*/
-static inline u32 btrfs_file_extent_inline_len(struct extent_buffer *eb,
- int slot,
- struct btrfs_file_extent_item *fi)
+static inline u32 btrfs_file_extent_inline_len(const struct extent_buffer *eb,
+ int slot,
+ const struct btrfs_file_extent_item *fi)
{
struct btrfs_map_token token;
@@ -3241,8 +3262,8 @@ static inline u32 btrfs_file_extent_inline_len(struct extent_buffer *eb,
/* btrfs_dev_stats_item */
-static inline u64 btrfs_dev_stats_value(struct extent_buffer *eb,
- struct btrfs_dev_stats_item *ptr,
+static inline u64 btrfs_dev_stats_value(const struct extent_buffer *eb,
+ const struct btrfs_dev_stats_item *ptr,
int index)
{
u64 val;
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 176a27bc63aa..81e5bc62e8e3 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -620,7 +620,7 @@ static void btrfs_dev_replace_update_device_in_mapping_tree(
em = lookup_extent_mapping(em_tree, start, (u64)-1);
if (!em)
break;
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
for (i = 0; i < map->num_stripes; i++)
if (srcdev == map->stripes[i].dev)
map->stripes[i].dev = tgtdev;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 1f21c6c33228..78722aaffecd 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -49,6 +49,7 @@
#include "raid56.h"
#include "sysfs.h"
#include "qgroup.h"
+#include "tree-checker.h"
#ifdef CONFIG_X86
#include <asm/cpufeature.h>
@@ -522,72 +523,6 @@ static int check_tree_block_fsid(struct btrfs_fs_info *fs_info,
return ret;
}
-#define CORRUPT(reason, eb, root, slot) \
- btrfs_crit(root->fs_info, "corrupt leaf, %s: block=%llu," \
- "root=%llu, slot=%d", reason, \
- btrfs_header_bytenr(eb), root->objectid, slot)
-
-static noinline int check_leaf(struct btrfs_root *root,
- struct extent_buffer *leaf)
-{
- struct btrfs_key key;
- struct btrfs_key leaf_key;
- u32 nritems = btrfs_header_nritems(leaf);
- int slot;
-
- if (nritems == 0)
- return 0;
-
- /* Check the 0 item */
- if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) !=
- BTRFS_LEAF_DATA_SIZE(root)) {
- CORRUPT("invalid item offset size pair", leaf, root, 0);
- return -EIO;
- }
-
- /*
- * Check to make sure each items keys are in the correct order and their
- * offsets make sense. We only have to loop through nritems-1 because
- * we check the current slot against the next slot, which verifies the
- * next slot's offset+size makes sense and that the current's slot
- * offset is correct.
- */
- for (slot = 0; slot < nritems - 1; slot++) {
- btrfs_item_key_to_cpu(leaf, &leaf_key, slot);
- btrfs_item_key_to_cpu(leaf, &key, slot + 1);
-
- /* Make sure the keys are in the right order */
- if (btrfs_comp_cpu_keys(&leaf_key, &key) >= 0) {
- CORRUPT("bad key order", leaf, root, slot);
- return -EIO;
- }
-
- /*
- * Make sure the offset and ends are right, remember that the
- * item data starts at the end of the leaf and grows towards the
- * front.
- */
- if (btrfs_item_offset_nr(leaf, slot) !=
- btrfs_item_end_nr(leaf, slot + 1)) {
- CORRUPT("slot offset bad", leaf, root, slot);
- return -EIO;
- }
-
- /*
- * Check to make sure that we don't point outside of the leaf,
- * just incase all the items are consistent to eachother, but
- * all point outside of the leaf.
- */
- if (btrfs_item_end_nr(leaf, slot) >
- BTRFS_LEAF_DATA_SIZE(root)) {
- CORRUPT("slot end outside of leaf", leaf, root, slot);
- return -EIO;
- }
- }
-
- return 0;
-}
-
static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
u64 phy_offset, struct page *page,
u64 start, u64 end, int mirror)
@@ -654,11 +589,14 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
* that we don't try and read the other copies of this block, just
* return -EIO.
*/
- if (found_level == 0 && check_leaf(root, eb)) {
+ if (found_level == 0 && btrfs_check_leaf_full(root, eb)) {
set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
ret = -EIO;
}
+ if (found_level > 0 && btrfs_check_node(root, eb))
+ ret = -EIO;
+
if (!ret)
set_extent_buffer_uptodate(eb);
err:
@@ -3958,7 +3896,13 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
buf->len,
root->fs_info->dirty_metadata_batch);
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
- if (btrfs_header_level(buf) == 0 && check_leaf(root, buf)) {
+ /*
+ * Since btrfs_mark_buffer_dirty() can be called with item pointer set
+ * but item data not updated.
+ * So here we should only check item pointers, not item data.
+ */
+ if (btrfs_header_level(buf) == 0 &&
+ btrfs_check_leaf_relaxed(root, buf)) {
btrfs_print_leaf(root, buf);
ASSERT(0);
}
@@ -4167,6 +4111,14 @@ static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info)
spin_lock(&fs_info->ordered_root_lock);
}
spin_unlock(&fs_info->ordered_root_lock);
+
+ /*
+ * We need this here because if we've been flipped read-only we won't
+ * get sync() from the umount, so we need to make sure any ordered
+ * extents that haven't had their dirty pages IO start writeout yet
+ * actually get run and error out properly.
+ */
+ btrfs_wait_ordered_roots(fs_info, -1);
}
static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 13ff0fdae03e..978bbfed5a2c 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2342,7 +2342,13 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
ins.type = BTRFS_EXTENT_ITEM_KEY;
}
- BUG_ON(node->ref_mod != 1);
+ if (node->ref_mod != 1) {
+ btrfs_err(root->fs_info,
+ "btree block(%llu) has %d references rather than 1: action %d ref_root %llu parent %llu",
+ node->bytenr, node->ref_mod, node->action, ref_root,
+ parent);
+ return -EIO;
+ }
if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
BUG_ON(!extent_op || !extent_op->update_flags);
ret = alloc_reserved_tree_block(trans, root,
@@ -9481,6 +9487,8 @@ static int find_first_block_group(struct btrfs_root *root,
int ret = 0;
struct btrfs_key found_key;
struct extent_buffer *leaf;
+ struct btrfs_block_group_item bg;
+ u64 flags;
int slot;
ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
@@ -9502,7 +9510,47 @@ static int find_first_block_group(struct btrfs_root *root,
if (found_key.objectid >= key->objectid &&
found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
- ret = 0;
+ struct extent_map_tree *em_tree;
+ struct extent_map *em;
+
+ em_tree = &root->fs_info->mapping_tree.map_tree;
+ read_lock(&em_tree->lock);
+ em = lookup_extent_mapping(em_tree, found_key.objectid,
+ found_key.offset);
+ read_unlock(&em_tree->lock);
+ if (!em) {
+ btrfs_err(root->fs_info,
+ "logical %llu len %llu found bg but no related chunk",
+ found_key.objectid, found_key.offset);
+ ret = -ENOENT;
+ } else if (em->start != found_key.objectid ||
+ em->len != found_key.offset) {
+ btrfs_err(root->fs_info,
+ "block group %llu len %llu mismatch with chunk %llu len %llu",
+ found_key.objectid, found_key.offset,
+ em->start, em->len);
+ ret = -EUCLEAN;
+ } else {
+ read_extent_buffer(leaf, &bg,
+ btrfs_item_ptr_offset(leaf, slot),
+ sizeof(bg));
+ flags = btrfs_block_group_flags(&bg) &
+ BTRFS_BLOCK_GROUP_TYPE_MASK;
+
+ if (flags != (em->map_lookup->type &
+ BTRFS_BLOCK_GROUP_TYPE_MASK)) {
+ btrfs_err(root->fs_info,
+"block group %llu len %llu type flags 0x%llx mismatch with chunk type flags 0x%llx",
+ found_key.objectid,
+ found_key.offset, flags,
+ (BTRFS_BLOCK_GROUP_TYPE_MASK &
+ em->map_lookup->type));
+ ret = -EUCLEAN;
+ } else {
+ ret = 0;
+ }
+ }
+ free_extent_map(em);
goto out;
}
path->slots[0]++;
@@ -9717,6 +9765,62 @@ btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size)
return cache;
}
+
+/*
+ * Iterate all chunks and verify that each of them has the corresponding block
+ * group
+ */
+static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
+ struct extent_map *em;
+ struct btrfs_block_group_cache *bg;
+ u64 start = 0;
+ int ret = 0;
+
+ while (1) {
+ read_lock(&map_tree->map_tree.lock);
+ /*
+ * lookup_extent_mapping will return the first extent map
+ * intersecting the range, so setting @len to 1 is enough to
+ * get the first chunk.
+ */
+ em = lookup_extent_mapping(&map_tree->map_tree, start, 1);
+ read_unlock(&map_tree->map_tree.lock);
+ if (!em)
+ break;
+
+ bg = btrfs_lookup_block_group(fs_info, em->start);
+ if (!bg) {
+ btrfs_err(fs_info,
+ "chunk start=%llu len=%llu doesn't have corresponding block group",
+ em->start, em->len);
+ ret = -EUCLEAN;
+ free_extent_map(em);
+ break;
+ }
+ if (bg->key.objectid != em->start ||
+ bg->key.offset != em->len ||
+ (bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK) !=
+ (em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
+ btrfs_err(fs_info,
+"chunk start=%llu len=%llu flags=0x%llx doesn't match block group start=%llu len=%llu flags=0x%llx",
+ em->start, em->len,
+ em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK,
+ bg->key.objectid, bg->key.offset,
+ bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK);
+ ret = -EUCLEAN;
+ free_extent_map(em);
+ btrfs_put_block_group(bg);
+ break;
+ }
+ start = em->start + em->len;
+ free_extent_map(em);
+ btrfs_put_block_group(bg);
+ }
+ return ret;
+}
+
int btrfs_read_block_groups(struct btrfs_root *root)
{
struct btrfs_path *path;
@@ -9903,7 +10007,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
}
init_global_block_rsv(info);
- ret = 0;
+ ret = check_chunk_block_group_mappings(info);
error:
btrfs_free_path(path);
return ret;
@@ -10388,7 +10492,7 @@ btrfs_start_trans_remove_block_group(struct btrfs_fs_info *fs_info,
* more device items and remove one chunk item), but this is done at
* btrfs_remove_chunk() through a call to check_system_chunk().
*/
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
num_items = 3 + map->num_stripes;
free_extent_map(em);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 88bee6703cc0..a18f558b4477 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3106,11 +3106,11 @@ static int __do_readpage(struct extent_io_tree *tree,
*/
if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) &&
prev_em_start && *prev_em_start != (u64)-1 &&
- *prev_em_start != em->orig_start)
+ *prev_em_start != em->start)
force_bio_submit = true;
if (prev_em_start)
- *prev_em_start = em->orig_start;
+ *prev_em_start = em->start;
free_extent_map(em);
em = NULL;
@@ -3847,8 +3847,10 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
struct block_device *bdev = fs_info->fs_devices->latest_bdev;
struct extent_io_tree *tree = &BTRFS_I(fs_info->btree_inode)->io_tree;
u64 offset = eb->start;
+ u32 nritems;
unsigned long i, num_pages;
unsigned long bio_flags = 0;
+ unsigned long start, end;
int rw = (epd->sync_io ? WRITE_SYNC : WRITE) | REQ_META;
int ret = 0;
@@ -3858,6 +3860,23 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
if (btrfs_header_owner(eb) == BTRFS_TREE_LOG_OBJECTID)
bio_flags = EXTENT_BIO_TREE_LOG;
+ /* set btree blocks beyond nritems with 0 to avoid stale content. */
+ nritems = btrfs_header_nritems(eb);
+ if (btrfs_header_level(eb) > 0) {
+ end = btrfs_node_key_ptr_offset(nritems);
+
+ memset_extent_buffer(eb, 0, end, eb->len - end);
+ } else {
+ /*
+ * leaf:
+ * header 0 1 2 .. N ... data_N .. data_2 data_1 data_0
+ */
+ start = btrfs_item_nr_offset(nritems);
+ end = btrfs_leaf_data(eb) +
+ leaf_data_end(fs_info->tree_root, eb);
+ memset_extent_buffer(eb, 0, start, end - start);
+ }
+
for (i = 0; i < num_pages; i++) {
struct page *p = eb->pages[i];
@@ -5362,9 +5381,8 @@ unlock_exit:
return ret;
}
-void read_extent_buffer(struct extent_buffer *eb, void *dstv,
- unsigned long start,
- unsigned long len)
+void read_extent_buffer(const struct extent_buffer *eb, void *dstv,
+ unsigned long start, unsigned long len)
{
size_t cur;
size_t offset;
@@ -5393,9 +5411,9 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv,
}
}
-int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dstv,
- unsigned long start,
- unsigned long len)
+int read_extent_buffer_to_user(const struct extent_buffer *eb,
+ void __user *dstv,
+ unsigned long start, unsigned long len)
{
size_t cur;
size_t offset;
@@ -5430,10 +5448,10 @@ int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dstv,
return ret;
}
-int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
- unsigned long min_len, char **map,
- unsigned long *map_start,
- unsigned long *map_len)
+int map_private_extent_buffer(const struct extent_buffer *eb,
+ unsigned long start, unsigned long min_len,
+ char **map, unsigned long *map_start,
+ unsigned long *map_len)
{
size_t offset = start & (PAGE_CACHE_SIZE - 1);
char *kaddr;
@@ -5468,9 +5486,8 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
return 0;
}
-int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
- unsigned long start,
- unsigned long len)
+int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv,
+ unsigned long start, unsigned long len)
{
size_t cur;
size_t offset;
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index f4c1ae11855f..751435967724 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -308,14 +308,13 @@ static inline void extent_buffer_get(struct extent_buffer *eb)
atomic_inc(&eb->refs);
}
-int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
- unsigned long start,
- unsigned long len);
-void read_extent_buffer(struct extent_buffer *eb, void *dst,
+int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv,
+ unsigned long start, unsigned long len);
+void read_extent_buffer(const struct extent_buffer *eb, void *dst,
unsigned long start,
unsigned long len);
-int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dst,
- unsigned long start,
+int read_extent_buffer_to_user(const struct extent_buffer *eb,
+ void __user *dst, unsigned long start,
unsigned long len);
void write_extent_buffer(struct extent_buffer *eb, const void *src,
unsigned long start, unsigned long len);
@@ -334,10 +333,10 @@ int set_extent_buffer_uptodate(struct extent_buffer *eb);
int clear_extent_buffer_uptodate(struct extent_buffer *eb);
int extent_buffer_uptodate(struct extent_buffer *eb);
int extent_buffer_under_io(struct extent_buffer *eb);
-int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset,
- unsigned long min_len, char **map,
- unsigned long *map_start,
- unsigned long *map_len);
+int map_private_extent_buffer(const struct extent_buffer *eb,
+ unsigned long offset, unsigned long min_len,
+ char **map, unsigned long *map_start,
+ unsigned long *map_len);
int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end);
int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end);
int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 6a98bddd8f33..84fb56d5c018 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -76,7 +76,7 @@ void free_extent_map(struct extent_map *em)
WARN_ON(extent_map_in_tree(em));
WARN_ON(!list_empty(&em->list));
if (test_bit(EXTENT_FLAG_FS_MAPPING, &em->flags))
- kfree(em->bdev);
+ kfree(em->map_lookup);
kmem_cache_free(extent_map_cache, em);
}
}
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index b2991fd8583e..eb8b8fae036b 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -32,7 +32,15 @@ struct extent_map {
u64 block_len;
u64 generation;
unsigned long flags;
- struct block_device *bdev;
+ union {
+ struct block_device *bdev;
+
+ /*
+ * used for chunk mappings
+ * flags & EXTENT_FLAG_FS_MAPPING must be set
+ */
+ struct map_lookup *map_lookup;
+ };
atomic_t refs;
unsigned int compress_type;
struct list_head list;
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 6dca9f937bf6..cc9ccc42f469 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -3460,7 +3460,7 @@ static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx,
return ret;
}
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
if (em->start != chunk_offset)
goto out;
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 83c73738165e..40d1ab957fb6 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -3232,7 +3232,8 @@ static void free_pending_move(struct send_ctx *sctx, struct pending_dir_move *m)
kfree(m);
}
-static void tail_append_pending_moves(struct pending_dir_move *moves,
+static void tail_append_pending_moves(struct send_ctx *sctx,
+ struct pending_dir_move *moves,
struct list_head *stack)
{
if (list_empty(&moves->list)) {
@@ -3243,6 +3244,10 @@ static void tail_append_pending_moves(struct pending_dir_move *moves,
list_add_tail(&moves->list, stack);
list_splice_tail(&list, stack);
}
+ if (!RB_EMPTY_NODE(&moves->node)) {
+ rb_erase(&moves->node, &sctx->pending_dir_moves);
+ RB_CLEAR_NODE(&moves->node);
+ }
}
static int apply_children_dir_moves(struct send_ctx *sctx)
@@ -3257,7 +3262,7 @@ static int apply_children_dir_moves(struct send_ctx *sctx)
return 0;
INIT_LIST_HEAD(&stack);
- tail_append_pending_moves(pm, &stack);
+ tail_append_pending_moves(sctx, pm, &stack);
while (!list_empty(&stack)) {
pm = list_first_entry(&stack, struct pending_dir_move, list);
@@ -3268,7 +3273,7 @@ static int apply_children_dir_moves(struct send_ctx *sctx)
goto out;
pm = get_pending_dir_moves(sctx, parent_ino);
if (pm)
- tail_append_pending_moves(pm, &stack);
+ tail_append_pending_moves(sctx, pm, &stack);
}
return 0;
diff --git a/fs/btrfs/struct-funcs.c b/fs/btrfs/struct-funcs.c
index b976597b0721..63ffd213b0b7 100644
--- a/fs/btrfs/struct-funcs.c
+++ b/fs/btrfs/struct-funcs.c
@@ -50,8 +50,8 @@ static inline void put_unaligned_le8(u8 val, void *p)
*/
#define DEFINE_BTRFS_SETGET_BITS(bits) \
-u##bits btrfs_get_token_##bits(struct extent_buffer *eb, void *ptr, \
- unsigned long off, \
+u##bits btrfs_get_token_##bits(const struct extent_buffer *eb, \
+ const void *ptr, unsigned long off, \
struct btrfs_map_token *token) \
{ \
unsigned long part_offset = (unsigned long)ptr; \
@@ -90,7 +90,8 @@ u##bits btrfs_get_token_##bits(struct extent_buffer *eb, void *ptr, \
return res; \
} \
void btrfs_set_token_##bits(struct extent_buffer *eb, \
- void *ptr, unsigned long off, u##bits val, \
+ const void *ptr, unsigned long off, \
+ u##bits val, \
struct btrfs_map_token *token) \
{ \
unsigned long part_offset = (unsigned long)ptr; \
@@ -133,7 +134,7 @@ DEFINE_BTRFS_SETGET_BITS(16)
DEFINE_BTRFS_SETGET_BITS(32)
DEFINE_BTRFS_SETGET_BITS(64)
-void btrfs_node_key(struct extent_buffer *eb,
+void btrfs_node_key(const struct extent_buffer *eb,
struct btrfs_disk_key *disk_key, int nr)
{
unsigned long ptr = btrfs_node_key_ptr_offset(nr);
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
new file mode 100644
index 000000000000..5b98f3c76ce4
--- /dev/null
+++ b/fs/btrfs/tree-checker.c
@@ -0,0 +1,649 @@
+/*
+ * Copyright (C) Qu Wenruo 2017. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program.
+ */
+
+/*
+ * The module is used to catch unexpected/corrupted tree block data.
+ * Such behavior can be caused either by a fuzzed image or bugs.
+ *
+ * The objective is to do leaf/node validation checks when tree block is read
+ * from disk, and check *every* possible member, so other code won't
+ * need to checking them again.
+ *
+ * Due to the potential and unwanted damage, every checker needs to be
+ * carefully reviewed otherwise so it does not prevent mount of valid images.
+ */
+
+#include "ctree.h"
+#include "tree-checker.h"
+#include "disk-io.h"
+#include "compression.h"
+#include "hash.h"
+#include "volumes.h"
+
+#define CORRUPT(reason, eb, root, slot) \
+ btrfs_crit(root->fs_info, \
+ "corrupt %s, %s: block=%llu, root=%llu, slot=%d", \
+ btrfs_header_level(eb) == 0 ? "leaf" : "node", \
+ reason, btrfs_header_bytenr(eb), root->objectid, slot)
+
+/*
+ * Error message should follow the following format:
+ * corrupt <type>: <identifier>, <reason>[, <bad_value>]
+ *
+ * @type: leaf or node
+ * @identifier: the necessary info to locate the leaf/node.
+ * It's recommened to decode key.objecitd/offset if it's
+ * meaningful.
+ * @reason: describe the error
+ * @bad_value: optional, it's recommened to output bad value and its
+ * expected value (range).
+ *
+ * Since comma is used to separate the components, only space is allowed
+ * inside each component.
+ */
+
+/*
+ * Append generic "corrupt leaf/node root=%llu block=%llu slot=%d: " to @fmt.
+ * Allows callers to customize the output.
+ */
+__printf(4, 5)
+static void generic_err(const struct btrfs_root *root,
+ const struct extent_buffer *eb, int slot,
+ const char *fmt, ...)
+{
+ struct va_format vaf;
+ va_list args;
+
+ va_start(args, fmt);
+
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ btrfs_crit(root->fs_info,
+ "corrupt %s: root=%llu block=%llu slot=%d, %pV",
+ btrfs_header_level(eb) == 0 ? "leaf" : "node",
+ root->objectid, btrfs_header_bytenr(eb), slot, &vaf);
+ va_end(args);
+}
+
+static int check_extent_data_item(struct btrfs_root *root,
+ struct extent_buffer *leaf,
+ struct btrfs_key *key, int slot)
+{
+ struct btrfs_file_extent_item *fi;
+ u32 sectorsize = root->sectorsize;
+ u32 item_size = btrfs_item_size_nr(leaf, slot);
+
+ if (!IS_ALIGNED(key->offset, sectorsize)) {
+ CORRUPT("unaligned key offset for file extent",
+ leaf, root, slot);
+ return -EUCLEAN;
+ }
+
+ fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
+
+ if (btrfs_file_extent_type(leaf, fi) > BTRFS_FILE_EXTENT_TYPES) {
+ CORRUPT("invalid file extent type", leaf, root, slot);
+ return -EUCLEAN;
+ }
+
+ /*
+ * Support for new compression/encrption must introduce incompat flag,
+ * and must be caught in open_ctree().
+ */
+ if (btrfs_file_extent_compression(leaf, fi) > BTRFS_COMPRESS_TYPES) {
+ CORRUPT("invalid file extent compression", leaf, root, slot);
+ return -EUCLEAN;
+ }
+ if (btrfs_file_extent_encryption(leaf, fi)) {
+ CORRUPT("invalid file extent encryption", leaf, root, slot);
+ return -EUCLEAN;
+ }
+ if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) {
+ /* Inline extent must have 0 as key offset */
+ if (key->offset) {
+ CORRUPT("inline extent has non-zero key offset",
+ leaf, root, slot);
+ return -EUCLEAN;
+ }
+
+ /* Compressed inline extent has no on-disk size, skip it */
+ if (btrfs_file_extent_compression(leaf, fi) !=
+ BTRFS_COMPRESS_NONE)
+ return 0;
+
+ /* Uncompressed inline extent size must match item size */
+ if (item_size != BTRFS_FILE_EXTENT_INLINE_DATA_START +
+ btrfs_file_extent_ram_bytes(leaf, fi)) {
+ CORRUPT("plaintext inline extent has invalid size",
+ leaf, root, slot);
+ return -EUCLEAN;
+ }
+ return 0;
+ }
+
+ /* Regular or preallocated extent has fixed item size */
+ if (item_size != sizeof(*fi)) {
+ CORRUPT(
+ "regluar or preallocated extent data item size is invalid",
+ leaf, root, slot);
+ return -EUCLEAN;
+ }
+ if (!IS_ALIGNED(btrfs_file_extent_ram_bytes(leaf, fi), sectorsize) ||
+ !IS_ALIGNED(btrfs_file_extent_disk_bytenr(leaf, fi), sectorsize) ||
+ !IS_ALIGNED(btrfs_file_extent_disk_num_bytes(leaf, fi), sectorsize) ||
+ !IS_ALIGNED(btrfs_file_extent_offset(leaf, fi), sectorsize) ||
+ !IS_ALIGNED(btrfs_file_extent_num_bytes(leaf, fi), sectorsize)) {
+ CORRUPT(
+ "regular or preallocated extent data item has unaligned value",
+ leaf, root, slot);
+ return -EUCLEAN;
+ }
+
+ return 0;
+}
+
+static int check_csum_item(struct btrfs_root *root, struct extent_buffer *leaf,
+ struct btrfs_key *key, int slot)
+{
+ u32 sectorsize = root->sectorsize;
+ u32 csumsize = btrfs_super_csum_size(root->fs_info->super_copy);
+
+ if (key->objectid != BTRFS_EXTENT_CSUM_OBJECTID) {
+ CORRUPT("invalid objectid for csum item", leaf, root, slot);
+ return -EUCLEAN;
+ }
+ if (!IS_ALIGNED(key->offset, sectorsize)) {
+ CORRUPT("unaligned key offset for csum item", leaf, root, slot);
+ return -EUCLEAN;
+ }
+ if (!IS_ALIGNED(btrfs_item_size_nr(leaf, slot), csumsize)) {
+ CORRUPT("unaligned csum item size", leaf, root, slot);
+ return -EUCLEAN;
+ }
+ return 0;
+}
+
+/*
+ * Customized reported for dir_item, only important new info is key->objectid,
+ * which represents inode number
+ */
+__printf(4, 5)
+static void dir_item_err(const struct btrfs_root *root,
+ const struct extent_buffer *eb, int slot,
+ const char *fmt, ...)
+{
+ struct btrfs_key key;
+ struct va_format vaf;
+ va_list args;
+
+ btrfs_item_key_to_cpu(eb, &key, slot);
+ va_start(args, fmt);
+
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ btrfs_crit(root->fs_info,
+ "corrupt %s: root=%llu block=%llu slot=%d ino=%llu, %pV",
+ btrfs_header_level(eb) == 0 ? "leaf" : "node", root->objectid,
+ btrfs_header_bytenr(eb), slot, key.objectid, &vaf);
+ va_end(args);
+}
+
+static int check_dir_item(struct btrfs_root *root,
+ struct extent_buffer *leaf,
+ struct btrfs_key *key, int slot)
+{
+ struct btrfs_dir_item *di;
+ u32 item_size = btrfs_item_size_nr(leaf, slot);
+ u32 cur = 0;
+
+ di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
+ while (cur < item_size) {
+ u32 name_len;
+ u32 data_len;
+ u32 max_name_len;
+ u32 total_size;
+ u32 name_hash;
+ u8 dir_type;
+
+ /* header itself should not cross item boundary */
+ if (cur + sizeof(*di) > item_size) {
+ dir_item_err(root, leaf, slot,
+ "dir item header crosses item boundary, have %zu boundary %u",
+ cur + sizeof(*di), item_size);
+ return -EUCLEAN;
+ }
+
+ /* dir type check */
+ dir_type = btrfs_dir_type(leaf, di);
+ if (dir_type >= BTRFS_FT_MAX) {
+ dir_item_err(root, leaf, slot,
+ "invalid dir item type, have %u expect [0, %u)",
+ dir_type, BTRFS_FT_MAX);
+ return -EUCLEAN;
+ }
+
+ if (key->type == BTRFS_XATTR_ITEM_KEY &&
+ dir_type != BTRFS_FT_XATTR) {
+ dir_item_err(root, leaf, slot,
+ "invalid dir item type for XATTR key, have %u expect %u",
+ dir_type, BTRFS_FT_XATTR);
+ return -EUCLEAN;
+ }
+ if (dir_type == BTRFS_FT_XATTR &&
+ key->type != BTRFS_XATTR_ITEM_KEY) {
+ dir_item_err(root, leaf, slot,
+ "xattr dir type found for non-XATTR key");
+ return -EUCLEAN;
+ }
+ if (dir_type == BTRFS_FT_XATTR)
+ max_name_len = XATTR_NAME_MAX;
+ else
+ max_name_len = BTRFS_NAME_LEN;
+
+ /* Name/data length check */
+ name_len = btrfs_dir_name_len(leaf, di);
+ data_len = btrfs_dir_data_len(leaf, di);
+ if (name_len > max_name_len) {
+ dir_item_err(root, leaf, slot,
+ "dir item name len too long, have %u max %u",
+ name_len, max_name_len);
+ return -EUCLEAN;
+ }
+ if (name_len + data_len > BTRFS_MAX_XATTR_SIZE(root)) {
+ dir_item_err(root, leaf, slot,
+ "dir item name and data len too long, have %u max %zu",
+ name_len + data_len,
+ BTRFS_MAX_XATTR_SIZE(root));
+ return -EUCLEAN;
+ }
+
+ if (data_len && dir_type != BTRFS_FT_XATTR) {
+ dir_item_err(root, leaf, slot,
+ "dir item with invalid data len, have %u expect 0",
+ data_len);
+ return -EUCLEAN;
+ }
+
+ total_size = sizeof(*di) + name_len + data_len;
+
+ /* header and name/data should not cross item boundary */
+ if (cur + total_size > item_size) {
+ dir_item_err(root, leaf, slot,
+ "dir item data crosses item boundary, have %u boundary %u",
+ cur + total_size, item_size);
+ return -EUCLEAN;
+ }
+
+ /*
+ * Special check for XATTR/DIR_ITEM, as key->offset is name
+ * hash, should match its name
+ */
+ if (key->type == BTRFS_DIR_ITEM_KEY ||
+ key->type == BTRFS_XATTR_ITEM_KEY) {
+ char namebuf[max(BTRFS_NAME_LEN, XATTR_NAME_MAX)];
+
+ read_extent_buffer(leaf, namebuf,
+ (unsigned long)(di + 1), name_len);
+ name_hash = btrfs_name_hash(namebuf, name_len);
+ if (key->offset != name_hash) {
+ dir_item_err(root, leaf, slot,
+ "name hash mismatch with key, have 0x%016x expect 0x%016llx",
+ name_hash, key->offset);
+ return -EUCLEAN;
+ }
+ }
+ cur += total_size;
+ di = (struct btrfs_dir_item *)((void *)di + total_size);
+ }
+ return 0;
+}
+
+__printf(4, 5)
+__cold
+static void block_group_err(const struct btrfs_fs_info *fs_info,
+ const struct extent_buffer *eb, int slot,
+ const char *fmt, ...)
+{
+ struct btrfs_key key;
+ struct va_format vaf;
+ va_list args;
+
+ btrfs_item_key_to_cpu(eb, &key, slot);
+ va_start(args, fmt);
+
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ btrfs_crit(fs_info,
+ "corrupt %s: root=%llu block=%llu slot=%d bg_start=%llu bg_len=%llu, %pV",
+ btrfs_header_level(eb) == 0 ? "leaf" : "node",
+ btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot,
+ key.objectid, key.offset, &vaf);
+ va_end(args);
+}
+
+static int check_block_group_item(struct btrfs_fs_info *fs_info,
+ struct extent_buffer *leaf,
+ struct btrfs_key *key, int slot)
+{
+ struct btrfs_block_group_item bgi;
+ u32 item_size = btrfs_item_size_nr(leaf, slot);
+ u64 flags;
+ u64 type;
+
+ /*
+ * Here we don't really care about alignment since extent allocator can
+ * handle it. We care more about the size, as if one block group is
+ * larger than maximum size, it's must be some obvious corruption.
+ */
+ if (key->offset > BTRFS_MAX_DATA_CHUNK_SIZE || key->offset == 0) {
+ block_group_err(fs_info, leaf, slot,
+ "invalid block group size, have %llu expect (0, %llu]",
+ key->offset, BTRFS_MAX_DATA_CHUNK_SIZE);
+ return -EUCLEAN;
+ }
+
+ if (item_size != sizeof(bgi)) {
+ block_group_err(fs_info, leaf, slot,
+ "invalid item size, have %u expect %zu",
+ item_size, sizeof(bgi));
+ return -EUCLEAN;
+ }
+
+ read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot),
+ sizeof(bgi));
+ if (btrfs_block_group_chunk_objectid(&bgi) !=
+ BTRFS_FIRST_CHUNK_TREE_OBJECTID) {
+ block_group_err(fs_info, leaf, slot,
+ "invalid block group chunk objectid, have %llu expect %llu",
+ btrfs_block_group_chunk_objectid(&bgi),
+ BTRFS_FIRST_CHUNK_TREE_OBJECTID);
+ return -EUCLEAN;
+ }
+
+ if (btrfs_block_group_used(&bgi) > key->offset) {
+ block_group_err(fs_info, leaf, slot,
+ "invalid block group used, have %llu expect [0, %llu)",
+ btrfs_block_group_used(&bgi), key->offset);
+ return -EUCLEAN;
+ }
+
+ flags = btrfs_block_group_flags(&bgi);
+ if (hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) > 1) {
+ block_group_err(fs_info, leaf, slot,
+"invalid profile flags, have 0x%llx (%lu bits set) expect no more than 1 bit set",
+ flags & BTRFS_BLOCK_GROUP_PROFILE_MASK,
+ hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK));
+ return -EUCLEAN;
+ }
+
+ type = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
+ if (type != BTRFS_BLOCK_GROUP_DATA &&
+ type != BTRFS_BLOCK_GROUP_METADATA &&
+ type != BTRFS_BLOCK_GROUP_SYSTEM &&
+ type != (BTRFS_BLOCK_GROUP_METADATA |
+ BTRFS_BLOCK_GROUP_DATA)) {
+ block_group_err(fs_info, leaf, slot,
+"invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llx or 0x%llx",
+ type, hweight64(type),
+ BTRFS_BLOCK_GROUP_DATA, BTRFS_BLOCK_GROUP_METADATA,
+ BTRFS_BLOCK_GROUP_SYSTEM,
+ BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA);
+ return -EUCLEAN;
+ }
+ return 0;
+}
+
+/*
+ * Common point to switch the item-specific validation.
+ */
+static int check_leaf_item(struct btrfs_root *root,
+ struct extent_buffer *leaf,
+ struct btrfs_key *key, int slot)
+{
+ int ret = 0;
+
+ switch (key->type) {
+ case BTRFS_EXTENT_DATA_KEY:
+ ret = check_extent_data_item(root, leaf, key, slot);
+ break;
+ case BTRFS_EXTENT_CSUM_KEY:
+ ret = check_csum_item(root, leaf, key, slot);
+ break;
+ case BTRFS_DIR_ITEM_KEY:
+ case BTRFS_DIR_INDEX_KEY:
+ case BTRFS_XATTR_ITEM_KEY:
+ ret = check_dir_item(root, leaf, key, slot);
+ break;
+ case BTRFS_BLOCK_GROUP_ITEM_KEY:
+ ret = check_block_group_item(root->fs_info, leaf, key, slot);
+ break;
+ }
+ return ret;
+}
+
+static int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf,
+ bool check_item_data)
+{
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ /* No valid key type is 0, so all key should be larger than this key */
+ struct btrfs_key prev_key = {0, 0, 0};
+ struct btrfs_key key;
+ u32 nritems = btrfs_header_nritems(leaf);
+ int slot;
+
+ if (btrfs_header_level(leaf) != 0) {
+ generic_err(root, leaf, 0,
+ "invalid level for leaf, have %d expect 0",
+ btrfs_header_level(leaf));
+ return -EUCLEAN;
+ }
+
+ /*
+ * Extent buffers from a relocation tree have a owner field that
+ * corresponds to the subvolume tree they are based on. So just from an
+ * extent buffer alone we can not find out what is the id of the
+ * corresponding subvolume tree, so we can not figure out if the extent
+ * buffer corresponds to the root of the relocation tree or not. So
+ * skip this check for relocation trees.
+ */
+ if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) {
+ u64 owner = btrfs_header_owner(leaf);
+ struct btrfs_root *check_root;
+
+ /* These trees must never be empty */
+ if (owner == BTRFS_ROOT_TREE_OBJECTID ||
+ owner == BTRFS_CHUNK_TREE_OBJECTID ||
+ owner == BTRFS_EXTENT_TREE_OBJECTID ||
+ owner == BTRFS_DEV_TREE_OBJECTID ||
+ owner == BTRFS_FS_TREE_OBJECTID ||
+ owner == BTRFS_DATA_RELOC_TREE_OBJECTID) {
+ generic_err(root, leaf, 0,
+ "invalid root, root %llu must never be empty",
+ owner);
+ return -EUCLEAN;
+ }
+ key.objectid = owner;
+ key.type = BTRFS_ROOT_ITEM_KEY;
+ key.offset = (u64)-1;
+
+ check_root = btrfs_get_fs_root(fs_info, &key, false);
+ /*
+ * The only reason we also check NULL here is that during
+ * open_ctree() some roots has not yet been set up.
+ */
+ if (!IS_ERR_OR_NULL(check_root)) {
+ struct extent_buffer *eb;
+
+ eb = btrfs_root_node(check_root);
+ /* if leaf is the root, then it's fine */
+ if (leaf != eb) {
+ CORRUPT("non-root leaf's nritems is 0",
+ leaf, check_root, 0);
+ free_extent_buffer(eb);
+ return -EUCLEAN;
+ }
+ free_extent_buffer(eb);
+ }
+ return 0;
+ }
+
+ if (nritems == 0)
+ return 0;
+
+ /*
+ * Check the following things to make sure this is a good leaf, and
+ * leaf users won't need to bother with similar sanity checks:
+ *
+ * 1) key ordering
+ * 2) item offset and size
+ * No overlap, no hole, all inside the leaf.
+ * 3) item content
+ * If possible, do comprehensive sanity check.
+ * NOTE: All checks must only rely on the item data itself.
+ */
+ for (slot = 0; slot < nritems; slot++) {
+ u32 item_end_expected;
+ int ret;
+
+ btrfs_item_key_to_cpu(leaf, &key, slot);
+
+ /* Make sure the keys are in the right order */
+ if (btrfs_comp_cpu_keys(&prev_key, &key) >= 0) {
+ CORRUPT("bad key order", leaf, root, slot);
+ return -EUCLEAN;
+ }
+
+ /*
+ * Make sure the offset and ends are right, remember that the
+ * item data starts at the end of the leaf and grows towards the
+ * front.
+ */
+ if (slot == 0)
+ item_end_expected = BTRFS_LEAF_DATA_SIZE(root);
+ else
+ item_end_expected = btrfs_item_offset_nr(leaf,
+ slot - 1);
+ if (btrfs_item_end_nr(leaf, slot) != item_end_expected) {
+ CORRUPT("slot offset bad", leaf, root, slot);
+ return -EUCLEAN;
+ }
+
+ /*
+ * Check to make sure that we don't point outside of the leaf,
+ * just in case all the items are consistent to each other, but
+ * all point outside of the leaf.
+ */
+ if (btrfs_item_end_nr(leaf, slot) >
+ BTRFS_LEAF_DATA_SIZE(root)) {
+ CORRUPT("slot end outside of leaf", leaf, root, slot);
+ return -EUCLEAN;
+ }
+
+ /* Also check if the item pointer overlaps with btrfs item. */
+ if (btrfs_item_nr_offset(slot) + sizeof(struct btrfs_item) >
+ btrfs_item_ptr_offset(leaf, slot)) {
+ CORRUPT("slot overlap with its data", leaf, root, slot);
+ return -EUCLEAN;
+ }
+
+ if (check_item_data) {
+ /*
+ * Check if the item size and content meet other
+ * criteria
+ */
+ ret = check_leaf_item(root, leaf, &key, slot);
+ if (ret < 0)
+ return ret;
+ }
+
+ prev_key.objectid = key.objectid;
+ prev_key.type = key.type;
+ prev_key.offset = key.offset;
+ }
+
+ return 0;
+}
+
+int btrfs_check_leaf_full(struct btrfs_root *root, struct extent_buffer *leaf)
+{
+ return check_leaf(root, leaf, true);
+}
+
+int btrfs_check_leaf_relaxed(struct btrfs_root *root,
+ struct extent_buffer *leaf)
+{
+ return check_leaf(root, leaf, false);
+}
+
+int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node)
+{
+ unsigned long nr = btrfs_header_nritems(node);
+ struct btrfs_key key, next_key;
+ int slot;
+ int level = btrfs_header_level(node);
+ u64 bytenr;
+ int ret = 0;
+
+ if (level <= 0 || level >= BTRFS_MAX_LEVEL) {
+ generic_err(root, node, 0,
+ "invalid level for node, have %d expect [1, %d]",
+ level, BTRFS_MAX_LEVEL - 1);
+ return -EUCLEAN;
+ }
+ if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root)) {
+ btrfs_crit(root->fs_info,
+"corrupt node: root=%llu block=%llu, nritems too %s, have %lu expect range [1,%zu]",
+ root->objectid, node->start,
+ nr == 0 ? "small" : "large", nr,
+ BTRFS_NODEPTRS_PER_BLOCK(root));
+ return -EUCLEAN;
+ }
+
+ for (slot = 0; slot < nr - 1; slot++) {
+ bytenr = btrfs_node_blockptr(node, slot);
+ btrfs_node_key_to_cpu(node, &key, slot);
+ btrfs_node_key_to_cpu(node, &next_key, slot + 1);
+
+ if (!bytenr) {
+ generic_err(root, node, slot,
+ "invalid NULL node pointer");
+ ret = -EUCLEAN;
+ goto out;
+ }
+ if (!IS_ALIGNED(bytenr, root->sectorsize)) {
+ generic_err(root, node, slot,
+ "unaligned pointer, have %llu should be aligned to %u",
+ bytenr, root->sectorsize);
+ ret = -EUCLEAN;
+ goto out;
+ }
+
+ if (btrfs_comp_cpu_keys(&key, &next_key) >= 0) {
+ generic_err(root, node, slot,
+ "bad key order, current (%llu %u %llu) next (%llu %u %llu)",
+ key.objectid, key.type, key.offset,
+ next_key.objectid, next_key.type,
+ next_key.offset);
+ ret = -EUCLEAN;
+ goto out;
+ }
+ }
+out:
+ return ret;
+}
diff --git a/fs/btrfs/tree-checker.h b/fs/btrfs/tree-checker.h
new file mode 100644
index 000000000000..3d53e8d6fda0
--- /dev/null
+++ b/fs/btrfs/tree-checker.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) Qu Wenruo 2017. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program.
+ */
+
+#ifndef __BTRFS_TREE_CHECKER__
+#define __BTRFS_TREE_CHECKER__
+
+#include "ctree.h"
+#include "extent_io.h"
+
+/*
+ * Comprehensive leaf checker.
+ * Will check not only the item pointers, but also every possible member
+ * in item data.
+ */
+int btrfs_check_leaf_full(struct btrfs_root *root, struct extent_buffer *leaf);
+
+/*
+ * Less strict leaf checker.
+ * Will only check item pointers, not reading item data.
+ */
+int btrfs_check_leaf_relaxed(struct btrfs_root *root,
+ struct extent_buffer *leaf);
+int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node);
+
+#endif
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index b4d63a9842fa..d1cca19b29d3 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1184,7 +1184,7 @@ again:
struct map_lookup *map;
int i;
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
for (i = 0; i < map->num_stripes; i++) {
u64 end;
@@ -2757,7 +2757,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
free_extent_map(em);
return -EINVAL;
}
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
lock_chunks(root->fs_info->chunk_root);
check_system_chunk(trans, extent_root, map->type);
unlock_chunks(root->fs_info->chunk_root);
@@ -4540,7 +4540,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
if (type & BTRFS_BLOCK_GROUP_DATA) {
max_stripe_size = 1024 * 1024 * 1024;
- max_chunk_size = 10 * max_stripe_size;
+ max_chunk_size = BTRFS_MAX_DATA_CHUNK_SIZE;
if (!devs_max)
devs_max = BTRFS_MAX_DEVS(info->chunk_root);
} else if (type & BTRFS_BLOCK_GROUP_METADATA) {
@@ -4731,7 +4731,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
goto error;
}
set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags);
- em->bdev = (struct block_device *)map;
+ em->map_lookup = map;
em->start = start;
em->len = num_bytes;
em->block_start = 0;
@@ -4826,7 +4826,7 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
return -EINVAL;
}
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
item_size = btrfs_chunk_item_size(map->num_stripes);
stripe_size = em->orig_block_len;
@@ -4968,7 +4968,7 @@ int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset)
if (!em)
return 1;
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
for (i = 0; i < map->num_stripes; i++) {
if (map->stripes[i].dev->missing) {
miss_ndevs++;
@@ -5048,7 +5048,7 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
return 1;
}
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1))
ret = map->num_stripes;
else if (map->type & BTRFS_BLOCK_GROUP_RAID10)
@@ -5091,7 +5091,7 @@ unsigned long btrfs_full_stripe_len(struct btrfs_root *root,
BUG_ON(!em);
BUG_ON(em->start > logical || em->start + em->len < logical);
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
len = map->stripe_len * nr_data_stripes(map);
free_extent_map(em);
@@ -5112,7 +5112,7 @@ int btrfs_is_parity_mirror(struct btrfs_mapping_tree *map_tree,
BUG_ON(!em);
BUG_ON(em->start > logical || em->start + em->len < logical);
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
ret = 1;
free_extent_map(em);
@@ -5271,7 +5271,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
return -EINVAL;
}
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
offset = logical - em->start;
stripe_len = map->stripe_len;
@@ -5813,7 +5813,7 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
free_extent_map(em);
return -EIO;
}
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
length = em->len;
rmap_len = map->stripe_len;
@@ -6208,6 +6208,101 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
return dev;
}
+/* Return -EIO if any error, otherwise return 0. */
+static int btrfs_check_chunk_valid(struct btrfs_root *root,
+ struct extent_buffer *leaf,
+ struct btrfs_chunk *chunk, u64 logical)
+{
+ u64 length;
+ u64 stripe_len;
+ u16 num_stripes;
+ u16 sub_stripes;
+ u64 type;
+ u64 features;
+ bool mixed = false;
+
+ length = btrfs_chunk_length(leaf, chunk);
+ stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
+ num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
+ sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
+ type = btrfs_chunk_type(leaf, chunk);
+
+ if (!num_stripes) {
+ btrfs_err(root->fs_info, "invalid chunk num_stripes: %u",
+ num_stripes);
+ return -EIO;
+ }
+ if (!IS_ALIGNED(logical, root->sectorsize)) {
+ btrfs_err(root->fs_info,
+ "invalid chunk logical %llu", logical);
+ return -EIO;
+ }
+ if (btrfs_chunk_sector_size(leaf, chunk) != root->sectorsize) {
+ btrfs_err(root->fs_info, "invalid chunk sectorsize %u",
+ btrfs_chunk_sector_size(leaf, chunk));
+ return -EIO;
+ }
+ if (!length || !IS_ALIGNED(length, root->sectorsize)) {
+ btrfs_err(root->fs_info,
+ "invalid chunk length %llu", length);
+ return -EIO;
+ }
+ if (!is_power_of_2(stripe_len)) {
+ btrfs_err(root->fs_info, "invalid chunk stripe length: %llu",
+ stripe_len);
+ return -EIO;
+ }
+ if (~(BTRFS_BLOCK_GROUP_TYPE_MASK | BTRFS_BLOCK_GROUP_PROFILE_MASK) &
+ type) {
+ btrfs_err(root->fs_info, "unrecognized chunk type: %llu",
+ ~(BTRFS_BLOCK_GROUP_TYPE_MASK |
+ BTRFS_BLOCK_GROUP_PROFILE_MASK) &
+ btrfs_chunk_type(leaf, chunk));
+ return -EIO;
+ }
+
+ if ((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0) {
+ btrfs_err(root->fs_info, "missing chunk type flag: 0x%llx", type);
+ return -EIO;
+ }
+
+ if ((type & BTRFS_BLOCK_GROUP_SYSTEM) &&
+ (type & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA))) {
+ btrfs_err(root->fs_info,
+ "system chunk with data or metadata type: 0x%llx", type);
+ return -EIO;
+ }
+
+ features = btrfs_super_incompat_flags(root->fs_info->super_copy);
+ if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
+ mixed = true;
+
+ if (!mixed) {
+ if ((type & BTRFS_BLOCK_GROUP_METADATA) &&
+ (type & BTRFS_BLOCK_GROUP_DATA)) {
+ btrfs_err(root->fs_info,
+ "mixed chunk type in non-mixed mode: 0x%llx", type);
+ return -EIO;
+ }
+ }
+
+ if ((type & BTRFS_BLOCK_GROUP_RAID10 && sub_stripes != 2) ||
+ (type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes != 2) ||
+ (type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) ||
+ (type & BTRFS_BLOCK_GROUP_RAID6 && num_stripes < 3) ||
+ (type & BTRFS_BLOCK_GROUP_DUP && num_stripes != 2) ||
+ ((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 &&
+ num_stripes != 1)) {
+ btrfs_err(root->fs_info,
+ "invalid num_stripes:sub_stripes %u:%u for profile %llu",
+ num_stripes, sub_stripes,
+ type & BTRFS_BLOCK_GROUP_PROFILE_MASK);
+ return -EIO;
+ }
+
+ return 0;
+}
+
static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
struct extent_buffer *leaf,
struct btrfs_chunk *chunk)
@@ -6217,6 +6312,7 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
struct extent_map *em;
u64 logical;
u64 length;
+ u64 stripe_len;
u64 devid;
u8 uuid[BTRFS_UUID_SIZE];
int num_stripes;
@@ -6225,6 +6321,12 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
logical = key->offset;
length = btrfs_chunk_length(leaf, chunk);
+ stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
+ num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
+
+ ret = btrfs_check_chunk_valid(root, leaf, chunk, logical);
+ if (ret)
+ return ret;
read_lock(&map_tree->map_tree.lock);
em = lookup_extent_mapping(&map_tree->map_tree, logical, 1);
@@ -6241,7 +6343,6 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
em = alloc_extent_map();
if (!em)
return -ENOMEM;
- num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
if (!map) {
free_extent_map(em);
@@ -6249,7 +6350,7 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
}
set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags);
- em->bdev = (struct block_device *)map;
+ em->map_lookup = map;
em->start = logical;
em->len = length;
em->orig_start = 0;
@@ -6473,6 +6574,7 @@ int btrfs_read_sys_array(struct btrfs_root *root)
u32 array_size;
u32 len = 0;
u32 cur_offset;
+ u64 type;
struct btrfs_key key;
ASSERT(BTRFS_SUPER_INFO_SIZE <= root->nodesize);
@@ -6539,6 +6641,15 @@ int btrfs_read_sys_array(struct btrfs_root *root)
break;
}
+ type = btrfs_chunk_type(sb, chunk);
+ if ((type & BTRFS_BLOCK_GROUP_SYSTEM) == 0) {
+ btrfs_err(root->fs_info,
+ "invalid chunk type %llu in sys_array at offset %u",
+ type, cur_offset);
+ ret = -EIO;
+ break;
+ }
+
len = btrfs_chunk_item_size(num_stripes);
if (cur_offset + len > array_size)
goto out_short_read;
@@ -6948,7 +7059,7 @@ void btrfs_update_commit_device_bytes_used(struct btrfs_root *root,
/* In order to kick the device replace finish process */
lock_chunks(root);
list_for_each_entry(em, &transaction->pending_chunks, list) {
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
for (i = 0; i < map->num_stripes; i++) {
dev = map->stripes[i].dev;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index d5c84f6b1353..3c651df420be 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -24,6 +24,8 @@
#include <linux/btrfs.h>
#include "async-thread.h"
+#define BTRFS_MAX_DATA_CHUNK_SIZE (10ULL * SZ_1G)
+
extern struct mutex uuid_mutex;
#define BTRFS_STRIPE_LEN (64 * 1024)
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index 5b68cf526887..c05ab2ec0fef 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -963,11 +963,8 @@ error:
void cachefiles_uncache_page(struct fscache_object *_object, struct page *page)
{
struct cachefiles_object *object;
- struct cachefiles_cache *cache;
object = container_of(_object, struct cachefiles_object, fscache);
- cache = container_of(object->fscache.cache,
- struct cachefiles_cache, cache);
_enter("%p,{%lu}", object, page->index);
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 0e3de1bb6500..e7b54514d99a 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -3243,7 +3243,6 @@ retry:
tcap->cap_id = t_cap_id;
tcap->seq = t_seq - 1;
tcap->issue_seq = t_seq - 1;
- tcap->mseq = t_mseq;
tcap->issued |= issued;
tcap->implemented |= issued;
if (cap == ci->i_auth_cap)
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 4aa7122a8d38..a485d0cdc559 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -611,7 +611,8 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
capsnap->size);
spin_lock(&mdsc->snap_flush_lock);
- list_add_tail(&ci->i_snap_flush_item, &mdsc->snap_flush_list);
+ if (list_empty(&ci->i_snap_flush_item))
+ list_add_tail(&ci->i_snap_flush_item, &mdsc->snap_flush_list);
spin_unlock(&mdsc->snap_flush_lock);
return 1; /* caller may want to ceph_flush_snaps */
}
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 1eeb4780c3ed..eacf57c24ca9 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -48,6 +48,7 @@
#include "cifs_unicode.h"
#include "cifs_debug.h"
#include "cifs_fs_sb.h"
+#include "dns_resolve.h"
#include "ntlmssp.h"
#include "nterr.h"
#include "rfc1002pdu.h"
@@ -304,6 +305,53 @@ static int cifs_setup_volume_info(struct smb_vol *volume_info, char *mount_data,
const char *devname);
/*
+ * Resolve hostname and set ip addr in tcp ses. Useful for hostnames that may
+ * get their ip addresses changed at some point.
+ *
+ * This should be called with server->srv_mutex held.
+ */
+#ifdef CONFIG_CIFS_DFS_UPCALL
+static int reconn_set_ipaddr(struct TCP_Server_Info *server)
+{
+ int rc;
+ int len;
+ char *unc, *ipaddr = NULL;
+
+ if (!server->hostname)
+ return -EINVAL;
+
+ len = strlen(server->hostname) + 3;
+
+ unc = kmalloc(len, GFP_KERNEL);
+ if (!unc) {
+ cifs_dbg(FYI, "%s: failed to create UNC path\n", __func__);
+ return -ENOMEM;
+ }
+ snprintf(unc, len, "\\\\%s", server->hostname);
+
+ rc = dns_resolve_server_name_to_ip(unc, &ipaddr);
+ kfree(unc);
+
+ if (rc < 0) {
+ cifs_dbg(FYI, "%s: failed to resolve server part of %s to IP: %d\n",
+ __func__, server->hostname, rc);
+ return rc;
+ }
+
+ rc = cifs_convert_address((struct sockaddr *)&server->dstaddr, ipaddr,
+ strlen(ipaddr));
+ kfree(ipaddr);
+
+ return !rc ? -1 : 0;
+}
+#else
+static inline int reconn_set_ipaddr(struct TCP_Server_Info *server)
+{
+ return 0;
+}
+#endif
+
+/*
* cifs tcp session reconnection
*
* mark tcp session as reconnecting so temporarily locked
@@ -400,6 +448,11 @@ cifs_reconnect(struct TCP_Server_Info *server)
rc = generic_ip_connect(server);
if (rc) {
cifs_dbg(FYI, "reconnect error %d\n", rc);
+ rc = reconn_set_ipaddr(server);
+ if (rc) {
+ cifs_dbg(FYI, "%s: failed to resolve hostname: %d\n",
+ __func__, rc);
+ }
mutex_unlock(&server->srv_mutex);
msleep(3000);
} else {
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 0141aba9eca6..0305e3866216 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1073,14 +1073,18 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
/*
* Accessing maxBuf is racy with cifs_reconnect - need to store value
- * and check it for zero before using.
+ * and check it before using.
*/
max_buf = tcon->ses->server->maxBuf;
- if (!max_buf) {
+ if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
free_xid(xid);
return -EINVAL;
}
+ BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
+ PAGE_SIZE);
+ max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
+ PAGE_SIZE);
max_num = (max_buf - sizeof(struct smb_hdr)) /
sizeof(LOCKING_ANDX_RANGE);
buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
@@ -1404,12 +1408,16 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
/*
* Accessing maxBuf is racy with cifs_reconnect - need to store value
- * and check it for zero before using.
+ * and check it before using.
*/
max_buf = tcon->ses->server->maxBuf;
- if (!max_buf)
+ if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
return -EINVAL;
+ BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
+ PAGE_SIZE);
+ max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
+ PAGE_SIZE);
max_num = (max_buf - sizeof(struct smb_hdr)) /
sizeof(LOCKING_ANDX_RANGE);
buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
@@ -2745,14 +2753,16 @@ cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
* these pages but not on the region from pos to ppos+len-1.
*/
written = cifs_user_writev(iocb, from);
- if (written > 0 && CIFS_CACHE_READ(cinode)) {
+ if (CIFS_CACHE_READ(cinode)) {
/*
- * Windows 7 server can delay breaking level2 oplock if a write
- * request comes - break it on the client to prevent reading
- * an old data.
+ * We have read level caching and we have just sent a write
+ * request to the server thus making data in the cache stale.
+ * Zap the cache and set oplock/lease level to NONE to avoid
+ * reading stale data from the cache. All subsequent read
+ * operations will read new data from the server.
*/
cifs_zap_mapping(inode);
- cifs_dbg(FYI, "Set no oplock for inode=%p after a write operation\n",
+ cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
inode);
cinode->oplock = 0;
}
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 57b039ebfb1f..43fa471c88d7 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -652,7 +652,14 @@ find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon, loff_t pos,
/* scan and find it */
int i;
char *cur_ent;
- char *end_of_smb = cfile->srch_inf.ntwrk_buf_start +
+ char *end_of_smb;
+
+ if (cfile->srch_inf.ntwrk_buf_start == NULL) {
+ cifs_dbg(VFS, "ntwrk_buf_start is NULL during readdir\n");
+ return -EIO;
+ }
+
+ end_of_smb = cfile->srch_inf.ntwrk_buf_start +
server->ops->calc_smb_size(
cfile->srch_inf.ntwrk_buf_start);
diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c
index b2aff0c6f22c..dee5250701de 100644
--- a/fs/cifs/smb2file.c
+++ b/fs/cifs/smb2file.c
@@ -123,12 +123,14 @@ smb2_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
/*
* Accessing maxBuf is racy with cifs_reconnect - need to store value
- * and check it for zero before using.
+ * and check it before using.
*/
max_buf = tcon->ses->server->maxBuf;
- if (!max_buf)
+ if (max_buf < sizeof(struct smb2_lock_element))
return -EINVAL;
+ BUILD_BUG_ON(sizeof(struct smb2_lock_element) > PAGE_SIZE);
+ max_buf = min_t(unsigned int, max_buf, PAGE_SIZE);
max_num = max_buf / sizeof(struct smb2_lock_element);
buf = kcalloc(max_num, sizeof(struct smb2_lock_element), GFP_KERNEL);
if (!buf)
@@ -265,6 +267,8 @@ smb2_push_mandatory_locks(struct cifsFileInfo *cfile)
return -EINVAL;
}
+ BUILD_BUG_ON(sizeof(struct smb2_lock_element) > PAGE_SIZE);
+ max_buf = min_t(unsigned int, max_buf, PAGE_SIZE);
max_num = max_buf / sizeof(struct smb2_lock_element);
buf = kcalloc(max_num, sizeof(struct smb2_lock_element), GFP_KERNEL);
if (!buf) {
diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c
index 8257a5a97cc0..98c25b969ab8 100644
--- a/fs/cifs/smb2maperror.c
+++ b/fs/cifs/smb2maperror.c
@@ -377,8 +377,8 @@ static const struct status_to_posix_error smb2_error_map_table[] = {
{STATUS_NONEXISTENT_EA_ENTRY, -EIO, "STATUS_NONEXISTENT_EA_ENTRY"},
{STATUS_NO_EAS_ON_FILE, -ENODATA, "STATUS_NO_EAS_ON_FILE"},
{STATUS_EA_CORRUPT_ERROR, -EIO, "STATUS_EA_CORRUPT_ERROR"},
- {STATUS_FILE_LOCK_CONFLICT, -EIO, "STATUS_FILE_LOCK_CONFLICT"},
- {STATUS_LOCK_NOT_GRANTED, -EIO, "STATUS_LOCK_NOT_GRANTED"},
+ {STATUS_FILE_LOCK_CONFLICT, -EACCES, "STATUS_FILE_LOCK_CONFLICT"},
+ {STATUS_LOCK_NOT_GRANTED, -EACCES, "STATUS_LOCK_NOT_GRANTED"},
{STATUS_DELETE_PENDING, -ENOENT, "STATUS_DELETE_PENDING"},
{STATUS_CTL_FILE_NOT_SUPPORTED, -ENOSYS,
"STATUS_CTL_FILE_NOT_SUPPORTED"},
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 2725085a3f9f..eae3cdffaf7f 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -143,14 +143,14 @@ smb2_wait_mtu_credits(struct TCP_Server_Info *server, unsigned int size,
scredits = server->credits;
/* can deadlock with reopen */
- if (scredits == 1) {
+ if (scredits <= 8) {
*num = SMB2_MAX_BUFFER_SIZE;
*credits = 0;
break;
}
- /* leave one credit for a possible reopen */
- scredits--;
+ /* leave some credits for reopen and other ops */
+ scredits -= 8;
*num = min_t(unsigned int, size,
scredits * SMB2_MAX_BUFFER_SIZE);
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index f7111bb88ec1..5e21d58c49ef 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -2523,8 +2523,8 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
if (rc == -ENODATA && rsp->hdr.Status == STATUS_NO_MORE_FILES) {
srch_inf->endOfSearch = true;
rc = 0;
- }
- cifs_stats_fail_inc(tcon, SMB2_QUERY_DIRECTORY_HE);
+ } else
+ cifs_stats_fail_inc(tcon, SMB2_QUERY_DIRECTORY_HE);
goto qdir_exit;
}
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index aacb15bd56fe..f087158c5555 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -82,8 +82,8 @@
#define NUMBER_OF_SMB2_COMMANDS 0x0013
-/* 4 len + 52 transform hdr + 64 hdr + 56 create rsp */
-#define MAX_SMB2_HDR_SIZE 0x00b0
+/* 52 transform hdr + 64 hdr + 88 create rsp */
+#define MAX_SMB2_HDR_SIZE 204
#define SMB2_PROTO_NUMBER cpu_to_le32(0x424d53fe)
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 54af10204e83..1cf0a336ec06 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -360,7 +360,7 @@ uncork:
if (rc < 0 && rc != -EINTR)
cifs_dbg(VFS, "Error %d sending data on socket to server\n",
rc);
- else
+ else if (rc > 0)
rc = 0;
return rc;
diff --git a/fs/dcache.c b/fs/dcache.c
index 141651b0c766..9ffe60702299 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1155,15 +1155,11 @@ static enum lru_status dentry_lru_isolate_shrink(struct list_head *item,
*/
void shrink_dcache_sb(struct super_block *sb)
{
- long freed;
-
do {
LIST_HEAD(dispose);
- freed = list_lru_walk(&sb->s_dentry_lru,
+ list_lru_walk(&sb->s_dentry_lru,
dentry_lru_isolate_shrink, &dispose, 1024);
-
- this_cpu_sub(nr_dentry_unused, freed);
shrink_dentry_list(&dispose);
cond_resched();
} while (list_lru_count(&sb->s_dentry_lru) > 0);
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index e49ba072bd64..22fe11baef2b 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -671,6 +671,13 @@ struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry,
struct dentry *dentry = NULL, *trap;
struct name_snapshot old_name;
+ if (IS_ERR(old_dir))
+ return old_dir;
+ if (IS_ERR(new_dir))
+ return new_dir;
+ if (IS_ERR_OR_NULL(old_dentry))
+ return old_dentry;
+
trap = lock_rename(new_dir, old_dir);
/* Source or destination directories don't exist? */
if (d_really_is_negative(old_dir) || d_really_is_negative(new_dir))
diff --git a/fs/dlm/ast.c b/fs/dlm/ast.c
index dcea1e37a1b7..f18619bc2e09 100644
--- a/fs/dlm/ast.c
+++ b/fs/dlm/ast.c
@@ -290,6 +290,8 @@ void dlm_callback_suspend(struct dlm_ls *ls)
flush_workqueue(ls->ls_callback_wq);
}
+#define MAX_CB_QUEUE 25
+
void dlm_callback_resume(struct dlm_ls *ls)
{
struct dlm_lkb *lkb, *safe;
@@ -300,15 +302,23 @@ void dlm_callback_resume(struct dlm_ls *ls)
if (!ls->ls_callback_wq)
return;
+more:
mutex_lock(&ls->ls_cb_mutex);
list_for_each_entry_safe(lkb, safe, &ls->ls_cb_delay, lkb_cb_list) {
list_del_init(&lkb->lkb_cb_list);
queue_work(ls->ls_callback_wq, &lkb->lkb_cb_work);
count++;
+ if (count == MAX_CB_QUEUE)
+ break;
}
mutex_unlock(&ls->ls_cb_mutex);
if (count)
log_rinfo(ls, "dlm_callback_resume %d", count);
+ if (count == MAX_CB_QUEUE) {
+ count = 0;
+ cond_resched();
+ goto more;
+ }
}
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 35502d4046f5..3a7f401e943c 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -1210,6 +1210,7 @@ static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret)
if (rv < 0) {
log_error(ls, "create_lkb idr error %d", rv);
+ dlm_free_lkb(lkb);
return rv;
}
@@ -4177,6 +4178,7 @@ static int receive_convert(struct dlm_ls *ls, struct dlm_message *ms)
(unsigned long long)lkb->lkb_recover_seq,
ms->m_header.h_nodeid, ms->m_lkid);
error = -ENOENT;
+ dlm_put_lkb(lkb);
goto fail;
}
@@ -4230,6 +4232,7 @@ static int receive_unlock(struct dlm_ls *ls, struct dlm_message *ms)
lkb->lkb_id, lkb->lkb_remid,
ms->m_header.h_nodeid, ms->m_lkid);
error = -ENOENT;
+ dlm_put_lkb(lkb);
goto fail;
}
@@ -5792,20 +5795,20 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
goto out;
}
}
-
- /* After ua is attached to lkb it will be freed by dlm_free_lkb().
- When DLM_IFL_USER is set, the dlm knows that this is a userspace
- lock and that lkb_astparam is the dlm_user_args structure. */
-
error = set_lock_args(mode, &ua->lksb, flags, namelen, timeout_cs,
fake_astfn, ua, fake_bastfn, &args);
- lkb->lkb_flags |= DLM_IFL_USER;
-
if (error) {
+ kfree(ua->lksb.sb_lvbptr);
+ ua->lksb.sb_lvbptr = NULL;
+ kfree(ua);
__put_lkb(ls, lkb);
goto out;
}
+ /* After ua is attached to lkb it will be freed by dlm_free_lkb().
+ When DLM_IFL_USER is set, the dlm knows that this is a userspace
+ lock and that lkb_astparam is the dlm_user_args structure. */
+ lkb->lkb_flags |= DLM_IFL_USER;
error = request_lock(ls, lkb, name, namelen, &args);
switch (error) {
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index f3e72787e7f9..30e4e01db35a 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -673,11 +673,11 @@ static int new_lockspace(const char *name, const char *cluster,
kfree(ls->ls_recover_buf);
out_lkbidr:
idr_destroy(&ls->ls_lkbidr);
+ out_rsbtbl:
for (i = 0; i < DLM_REMOVE_NAMES_MAX; i++) {
if (ls->ls_remove_names[i])
kfree(ls->ls_remove_names[i]);
}
- out_rsbtbl:
vfree(ls->ls_rsbtbl);
out_lsfree:
if (do_unreg)
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index d72d52b90433..280460fef066 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -20,8 +20,13 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
spin_lock(&sb->s_inode_list_lock);
list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
spin_lock(&inode->i_lock);
+ /*
+ * We must skip inodes in unusual state. We may also skip
+ * inodes without pages but we deliberately won't in case
+ * we need to reschedule to avoid softlockups.
+ */
if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
- (inode->i_mapping->nrpages == 0)) {
+ (inode->i_mapping->nrpages == 0 && !need_resched())) {
spin_unlock(&inode->i_lock);
continue;
}
@@ -29,6 +34,7 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
spin_unlock(&inode->i_lock);
spin_unlock(&sb->s_inode_list_lock);
+ cond_resched();
invalidate_mapping_pages(inode->i_mapping, 0, -1);
iput(toput_inode);
toput_inode = inode;
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 1b08556776ce..240d9ceb8d0c 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1034,7 +1034,7 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
* semantics). All the events that happen during that period of time are
* chained in ep->ovflist and requeued later on.
*/
- if (unlikely(ep->ovflist != EP_UNACTIVE_PTR)) {
+ if (ep->ovflist != EP_UNACTIVE_PTR) {
if (epi->next == EP_UNACTIVE_PTR) {
epi->next = ep->ovflist;
ep->ovflist = epi;
diff --git a/fs/exec.c b/fs/exec.c
index 910fc70c4542..3dad755b7048 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -191,6 +191,7 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
{
struct page *page;
int ret;
+ unsigned int gup_flags = FOLL_FORCE;
#ifdef CONFIG_STACK_GROWSUP
if (write) {
@@ -199,8 +200,12 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
return NULL;
}
#endif
- ret = get_user_pages(current, bprm->mm, pos,
- 1, write, 1, &page, NULL);
+
+ if (write)
+ gup_flags |= FOLL_WRITE;
+
+ ret = get_user_pages(current, bprm->mm, pos, 1, gup_flags,
+ &page, NULL);
if (ret <= 0)
return NULL;
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 714cd37a6ba3..6599c6124552 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -76,7 +76,7 @@ static bool dentry_connected(struct dentry *dentry)
struct dentry *parent = dget_parent(dentry);
dput(dentry);
- if (IS_ROOT(dentry)) {
+ if (dentry == parent) {
dput(parent);
return false;
}
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 748d35afc902..860024392969 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -721,7 +721,8 @@ static loff_t ext2_max_size(int bits)
{
loff_t res = EXT2_NDIR_BLOCKS;
int meta_blocks;
- loff_t upper_limit;
+ unsigned int upper_limit;
+ unsigned int ppb = 1 << (bits-2);
/* This is calculated to be the largest file size for a
* dense, file such that the total number of
@@ -735,24 +736,34 @@ static loff_t ext2_max_size(int bits)
/* total blocks in file system block size */
upper_limit >>= (bits - 9);
+ /* Compute how many blocks we can address by block tree */
+ res += 1LL << (bits-2);
+ res += 1LL << (2*(bits-2));
+ res += 1LL << (3*(bits-2));
+ /* Does block tree limit file size? */
+ if (res < upper_limit)
+ goto check_lfs;
+ res = upper_limit;
+ /* How many metadata blocks are needed for addressing upper_limit? */
+ upper_limit -= EXT2_NDIR_BLOCKS;
/* indirect blocks */
meta_blocks = 1;
+ upper_limit -= ppb;
/* double indirect blocks */
- meta_blocks += 1 + (1LL << (bits-2));
- /* tripple indirect blocks */
- meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2)));
-
- upper_limit -= meta_blocks;
- upper_limit <<= bits;
-
- res += 1LL << (bits-2);
- res += 1LL << (2*(bits-2));
- res += 1LL << (3*(bits-2));
+ if (upper_limit < ppb * ppb) {
+ meta_blocks += 1 + DIV_ROUND_UP(upper_limit, ppb);
+ res -= meta_blocks;
+ goto check_lfs;
+ }
+ meta_blocks += 1 + ppb;
+ upper_limit -= ppb * ppb;
+ /* tripple indirect blocks for the rest */
+ meta_blocks += 1 + DIV_ROUND_UP(upper_limit, ppb) +
+ DIV_ROUND_UP(upper_limit, ppb*ppb);
+ res -= meta_blocks;
+check_lfs:
res <<= bits;
- if (res > upper_limit)
- res = upper_limit;
-
if (res > MAX_LFS_FILESIZE)
res = MAX_LFS_FILESIZE;
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 1aec46733ef8..0dcd33f62637 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -701,8 +701,11 @@ int ext4_try_to_write_inline_data(struct address_space *mapping,
if (!PageUptodate(page)) {
ret = ext4_read_inline_page(inode, page);
- if (ret < 0)
+ if (ret < 0) {
+ unlock_page(page);
+ put_page(page);
goto out_up_read;
+ }
}
ret = 1;
@@ -1858,12 +1861,12 @@ int ext4_inline_data_fiemap(struct inode *inode,
physical += (char *)ext4_raw_inode(&iloc) - iloc.bh->b_data;
physical += offsetof(struct ext4_inode, i_block);
- if (physical)
- error = fiemap_fill_next_extent(fieinfo, start, physical,
- inline_len, flags);
brelse(iloc.bh);
out:
up_read(&EXT4_I(inode)->xattr_sem);
+ if (physical)
+ error = fiemap_fill_next_extent(fieinfo, start, physical,
+ inline_len, flags);
return (error < 0 ? error : 0);
}
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index bad13f049fb0..4bd12247a9be 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -1600,7 +1600,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
}
if (reserved_gdb || gdb_off == 0) {
- if (ext4_has_feature_resize_inode(sb) ||
+ if (!ext4_has_feature_resize_inode(sb) ||
!le16_to_cpu(es->s_reserved_gdt_blocks)) {
ext4_warning(sb,
"No reserved GDT blocks, can't resize");
@@ -1928,7 +1928,8 @@ retry:
le16_to_cpu(es->s_reserved_gdt_blocks);
n_group = n_desc_blocks * EXT4_DESC_PER_BLOCK(sb);
n_blocks_count = (ext4_fsblk_t)n_group *
- EXT4_BLOCKS_PER_GROUP(sb);
+ EXT4_BLOCKS_PER_GROUP(sb) +
+ le32_to_cpu(es->s_first_data_block);
n_group--; /* set to last group number */
}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index cd9cd581fd92..6a7df72cb3da 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1049,6 +1049,16 @@ static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
ext4_nfs_get_inode);
}
+static int ext4_nfs_commit_metadata(struct inode *inode)
+{
+ struct writeback_control wbc = {
+ .sync_mode = WB_SYNC_ALL
+ };
+
+ trace_ext4_nfs_commit_metadata(inode);
+ return ext4_write_inode(inode, &wbc);
+}
+
/*
* Try to release metadata pages (indirect blocks, directories) which are
* mapped via the block device. Since these pages could have journal heads
@@ -1143,6 +1153,7 @@ static const struct export_operations ext4_export_ops = {
.fh_to_dentry = ext4_fh_to_dentry,
.fh_to_parent = ext4_fh_to_parent,
.get_parent = ext4_get_parent,
+ .commit_metadata = ext4_nfs_commit_metadata,
};
enum {
@@ -5184,9 +5195,9 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
qf_inode->i_flags |= S_NOQUOTA;
lockdep_set_quota_inode(qf_inode, I_DATA_SEM_QUOTA);
err = dquot_enable(qf_inode, type, format_id, flags);
- iput(qf_inode);
if (err)
lockdep_set_quota_inode(qf_inode, I_DATA_SEM_NORMAL);
+ iput(qf_inode);
return err;
}
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index 83dcf7bfd7b8..f0ea91925343 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -350,12 +350,14 @@ static int f2fs_acl_create(struct inode *dir, umode_t *mode,
return PTR_ERR(p);
clone = f2fs_acl_clone(p, GFP_NOFS);
- if (!clone)
- goto no_mem;
+ if (!clone) {
+ ret = -ENOMEM;
+ goto release_acl;
+ }
ret = f2fs_acl_create_masq(clone, mode);
if (ret < 0)
- goto no_mem_clone;
+ goto release_clone;
if (ret == 0)
posix_acl_release(clone);
@@ -369,11 +371,11 @@ static int f2fs_acl_create(struct inode *dir, umode_t *mode,
return 0;
-no_mem_clone:
+release_clone:
posix_acl_release(clone);
-no_mem:
+release_acl:
posix_acl_release(p);
- return -ENOMEM;
+ return ret;
}
int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage,
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index f661d80474be..4b2f609f376d 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -58,6 +58,7 @@ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index,
.rw = READ_SYNC | REQ_META | REQ_PRIO,
.blk_addr = index,
.encrypted_page = NULL,
+ .is_meta = is_meta,
};
if (unlikely(!is_meta))
@@ -74,8 +75,10 @@ repeat:
fio.page = page;
if (f2fs_submit_page_bio(&fio)) {
- f2fs_put_page(page, 1);
- goto repeat;
+ memset(page_address(page), 0, PAGE_SIZE);
+ f2fs_stop_checkpoint(sbi);
+ f2fs_bug_on(sbi, 1);
+ return page;
}
lock_page(page);
@@ -106,7 +109,8 @@ struct page *get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index)
return __get_meta_page(sbi, index, false);
}
-bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type)
+bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
+ block_t blkaddr, int type)
{
switch (type) {
case META_NAT:
@@ -126,8 +130,20 @@ bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type)
return false;
break;
case META_POR:
+ case DATA_GENERIC:
if (unlikely(blkaddr >= MAX_BLKADDR(sbi) ||
- blkaddr < MAIN_BLKADDR(sbi)))
+ blkaddr < MAIN_BLKADDR(sbi))) {
+ if (type == DATA_GENERIC) {
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "access invalid blkaddr:%u", blkaddr);
+ WARN_ON(1);
+ }
+ return false;
+ }
+ break;
+ case META_GENERIC:
+ if (unlikely(blkaddr < SEG0_BLKADDR(sbi) ||
+ blkaddr >= MAIN_BLKADDR(sbi)))
return false;
break;
default:
@@ -151,6 +167,7 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
.type = META,
.rw = sync ? (READ_SYNC | REQ_META | REQ_PRIO) : READA,
.encrypted_page = NULL,
+ .is_meta = (type != META_POR),
};
if (unlikely(type == META_POR))
@@ -158,7 +175,7 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
for (; nrpages-- > 0; blkno++) {
- if (!is_valid_blkaddr(sbi, blkno, type))
+ if (!f2fs_is_valid_blkaddr(sbi, blkno, type))
goto out;
switch (type) {
@@ -601,54 +618,73 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
}
}
-static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
- block_t cp_addr, unsigned long long *version)
+static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr,
+ struct f2fs_checkpoint **cp_block, struct page **cp_page,
+ unsigned long long *version)
{
- struct page *cp_page_1, *cp_page_2 = NULL;
unsigned long blk_size = sbi->blocksize;
- struct f2fs_checkpoint *cp_block;
- unsigned long long cur_version = 0, pre_version = 0;
- size_t crc_offset;
+ size_t crc_offset = 0;
__u32 crc = 0;
- /* Read the 1st cp block in this CP pack */
- cp_page_1 = get_meta_page(sbi, cp_addr);
+ *cp_page = get_meta_page(sbi, cp_addr);
+ *cp_block = (struct f2fs_checkpoint *)page_address(*cp_page);
- /* get the version number */
- cp_block = (struct f2fs_checkpoint *)page_address(cp_page_1);
- crc_offset = le32_to_cpu(cp_block->checksum_offset);
- if (crc_offset >= blk_size)
- goto invalid_cp1;
-
- crc = le32_to_cpu(*((__le32 *)((unsigned char *)cp_block + crc_offset)));
- if (!f2fs_crc_valid(crc, cp_block, crc_offset))
- goto invalid_cp1;
+ crc_offset = le32_to_cpu((*cp_block)->checksum_offset);
+ if (crc_offset >= blk_size) {
+ f2fs_put_page(*cp_page, 1);
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "invalid crc_offset: %zu", crc_offset);
+ return -EINVAL;
+ }
- pre_version = cur_cp_version(cp_block);
+ crc = le32_to_cpu(*((__le32 *)((unsigned char *)*cp_block
+ + crc_offset)));
+ if (!f2fs_crc_valid(crc, *cp_block, crc_offset)) {
+ f2fs_put_page(*cp_page, 1);
+ f2fs_msg(sbi->sb, KERN_WARNING, "invalid crc value");
+ return -EINVAL;
+ }
- /* Read the 2nd cp block in this CP pack */
- cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1;
- cp_page_2 = get_meta_page(sbi, cp_addr);
+ *version = cur_cp_version(*cp_block);
+ return 0;
+}
- cp_block = (struct f2fs_checkpoint *)page_address(cp_page_2);
- crc_offset = le32_to_cpu(cp_block->checksum_offset);
- if (crc_offset >= blk_size)
- goto invalid_cp2;
+static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
+ block_t cp_addr, unsigned long long *version)
+{
+ struct page *cp_page_1 = NULL, *cp_page_2 = NULL;
+ struct f2fs_checkpoint *cp_block = NULL;
+ unsigned long long cur_version = 0, pre_version = 0;
+ int err;
- crc = le32_to_cpu(*((__le32 *)((unsigned char *)cp_block + crc_offset)));
- if (!f2fs_crc_valid(crc, cp_block, crc_offset))
- goto invalid_cp2;
+ err = get_checkpoint_version(sbi, cp_addr, &cp_block,
+ &cp_page_1, version);
+ if (err)
+ return NULL;
+
+ if (le32_to_cpu(cp_block->cp_pack_total_block_count) >
+ sbi->blocks_per_seg) {
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "invalid cp_pack_total_block_count:%u",
+ le32_to_cpu(cp_block->cp_pack_total_block_count));
+ goto invalid_cp;
+ }
+ pre_version = *version;
- cur_version = cur_cp_version(cp_block);
+ cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1;
+ err = get_checkpoint_version(sbi, cp_addr, &cp_block,
+ &cp_page_2, version);
+ if (err)
+ goto invalid_cp;
+ cur_version = *version;
if (cur_version == pre_version) {
*version = cur_version;
f2fs_put_page(cp_page_2, 1);
return cp_page_1;
}
-invalid_cp2:
f2fs_put_page(cp_page_2, 1);
-invalid_cp1:
+invalid_cp:
f2fs_put_page(cp_page_1, 1);
return NULL;
}
@@ -696,6 +732,15 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi)
cp_block = (struct f2fs_checkpoint *)page_address(cur_page);
memcpy(sbi->ckpt, cp_block, blk_size);
+ if (cur_page == cp1)
+ sbi->cur_cp_pack = 1;
+ else
+ sbi->cur_cp_pack = 2;
+
+ /* Sanity checking of checkpoint */
+ if (sanity_check_ckpt(sbi))
+ goto free_fail_no_cp;
+
if (cp_blks <= 1)
goto done;
@@ -717,6 +762,9 @@ done:
f2fs_put_page(cp2, 1);
return 0;
+free_fail_no_cp:
+ f2fs_put_page(cp1, 1);
+ f2fs_put_page(cp2, 1);
fail_no_cp:
kfree(sbi->ckpt);
return -EINVAL;
@@ -767,24 +815,6 @@ out:
f2fs_trace_pid(page);
}
-void add_dirty_dir_inode(struct inode *inode)
-{
- struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- struct inode_entry *new =
- f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
- int ret = 0;
-
- new->inode = inode;
- INIT_LIST_HEAD(&new->list);
-
- spin_lock(&sbi->dir_inode_lock);
- ret = __add_dirty_inode(inode, new);
- spin_unlock(&sbi->dir_inode_lock);
-
- if (ret)
- kmem_cache_free(inode_entry_slab, new);
-}
-
void remove_dirty_dir_inode(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
@@ -807,12 +837,6 @@ void remove_dirty_dir_inode(struct inode *inode)
stat_dec_dirty_dir(sbi);
spin_unlock(&sbi->dir_inode_lock);
kmem_cache_free(inode_entry_slab, entry);
-
- /* Only from the recovery routine */
- if (is_inode_flag_set(F2FS_I(inode), FI_DELAY_IPUT)) {
- clear_inode_flag(F2FS_I(inode), FI_DELAY_IPUT);
- iput(inode);
- }
}
void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi)
@@ -922,7 +946,6 @@ static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
{
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
- struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
struct f2fs_nm_info *nm_i = NM_I(sbi);
unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num;
nid_t last_nid = nm_i->next_scan_nid;
@@ -931,15 +954,6 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
__u32 crc32 = 0;
int i;
int cp_payload_blks = __cp_payload(sbi);
- block_t discard_blk = NEXT_FREE_BLKADDR(sbi, curseg);
- bool invalidate = false;
-
- /*
- * This avoids to conduct wrong roll-forward operations and uses
- * metapages, so should be called prior to sync_meta_pages below.
- */
- if (discard_next_dnode(sbi, discard_blk))
- invalidate = true;
/* Flush all the NAT/SIT pages */
while (get_pages(sbi, F2FS_DIRTY_META)) {
@@ -1016,6 +1030,9 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
set_ckpt_flags(ckpt, CP_FSCK_FLAG);
+ /* set this flag to activate crc|cp_ver for recovery */
+ set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG);
+
/* update SIT/NAT bitmap */
get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP));
get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP));
@@ -1025,7 +1042,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
le32_to_cpu(ckpt->checksum_offset)))
= cpu_to_le32(crc32);
- start_blk = __start_cp_addr(sbi);
+ start_blk = __start_cp_next_addr(sbi);
/* need to wait for end_io results */
wait_on_all_pages_writeback(sbi);
@@ -1073,14 +1090,6 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
/* wait for previous submitted meta pages writeback */
wait_on_all_pages_writeback(sbi);
- /*
- * invalidate meta page which is used temporarily for zeroing out
- * block at the end of warm node chain.
- */
- if (invalidate)
- invalidate_mapping_pages(META_MAPPING(sbi), discard_blk,
- discard_blk);
-
release_dirty_inode(sbi);
if (unlikely(f2fs_cp_error(sbi)))
@@ -1088,6 +1097,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
clear_prefree_segments(sbi, cpc);
clear_sbi_flag(sbi, SBI_IS_DIRTY);
+ __set_cp_next_pack(sbi);
}
/*
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index f6ccb21f286b..2b0b671484bd 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -147,6 +147,10 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
struct bio *bio;
struct page *page = fio->encrypted_page ? fio->encrypted_page : fio->page;
+ if (!f2fs_is_valid_blkaddr(fio->sbi, fio->blk_addr,
+ __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC))
+ return -EFAULT;
+
trace_f2fs_submit_page_bio(page, fio);
f2fs_trace_ios(fio, 0);
@@ -172,7 +176,7 @@ void f2fs_submit_page_mbio(struct f2fs_io_info *fio)
io = is_read ? &sbi->read_io : &sbi->write_io[btype];
- verify_block_addr(sbi, fio->blk_addr);
+ verify_block_addr(fio, fio->blk_addr);
down_write(&io->io_rwsem);
@@ -603,7 +607,13 @@ static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
goto unlock_out;
}
- if (dn.data_blkaddr == NEW_ADDR || dn.data_blkaddr == NULL_ADDR) {
+ if (__is_valid_data_blkaddr(dn.data_blkaddr) &&
+ !f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr, DATA_GENERIC)) {
+ err = -EFAULT;
+ goto sync_out;
+ }
+
+ if (!is_valid_data_blkaddr(sbi, dn.data_blkaddr)) {
if (create) {
if (unlikely(f2fs_cp_error(sbi))) {
err = -EIO;
@@ -866,6 +876,40 @@ out:
return ret;
}
+struct bio *f2fs_grab_bio(struct inode *inode, block_t blkaddr,
+ unsigned nr_pages)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct f2fs_crypto_ctx *ctx = NULL;
+ struct block_device *bdev = sbi->sb->s_bdev;
+ struct bio *bio;
+
+ if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC))
+ return ERR_PTR(-EFAULT);
+
+ if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) {
+ ctx = f2fs_get_crypto_ctx(inode);
+ if (IS_ERR(ctx))
+ return ERR_CAST(ctx);
+
+ /* wait the page to be moved by cleaning */
+ f2fs_wait_on_encrypted_page_writeback(sbi, blkaddr);
+ }
+
+ bio = bio_alloc(GFP_KERNEL, min_t(int, nr_pages, BIO_MAX_PAGES));
+ if (!bio) {
+ if (ctx)
+ f2fs_release_crypto_ctx(ctx);
+ return ERR_PTR(-ENOMEM);
+ }
+ bio->bi_bdev = bdev;
+ bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blkaddr);
+ bio->bi_end_io = f2fs_read_end_io;
+ bio->bi_private = ctx;
+
+ return bio;
+}
+
/*
* This function was originally taken from fs/mpage.c, and customized for f2fs.
* Major change was from block_size == page_size in f2fs by default.
@@ -884,7 +928,6 @@ static int f2fs_mpage_readpages(struct address_space *mapping,
sector_t last_block;
sector_t last_block_in_file;
sector_t block_nr;
- struct block_device *bdev = inode->i_sb->s_bdev;
struct f2fs_map_blocks map;
map.m_pblk = 0;
@@ -941,6 +984,10 @@ got_it:
SetPageUptodate(page);
goto confused;
}
+
+ if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr,
+ DATA_GENERIC))
+ goto set_error_page;
} else {
zero_user_segment(page, 0, PAGE_CACHE_SIZE);
SetPageUptodate(page);
@@ -958,31 +1005,9 @@ submit_and_realloc:
bio = NULL;
}
if (bio == NULL) {
- struct f2fs_crypto_ctx *ctx = NULL;
-
- if (f2fs_encrypted_inode(inode) &&
- S_ISREG(inode->i_mode)) {
-
- ctx = f2fs_get_crypto_ctx(inode);
- if (IS_ERR(ctx))
- goto set_error_page;
-
- /* wait the page to be moved by cleaning */
- f2fs_wait_on_encrypted_page_writeback(
- F2FS_I_SB(inode), block_nr);
- }
-
- bio = bio_alloc(GFP_KERNEL,
- min_t(int, nr_pages, BIO_MAX_PAGES));
- if (!bio) {
- if (ctx)
- f2fs_release_crypto_ctx(ctx);
+ bio = f2fs_grab_bio(inode, block_nr, nr_pages);
+ if (IS_ERR(bio))
goto set_error_page;
- }
- bio->bi_bdev = bdev;
- bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(block_nr);
- bio->bi_end_io = f2fs_read_end_io;
- bio->bi_private = ctx;
}
if (bio_add_page(bio, page, blocksize, 0) < blocksize)
@@ -1077,11 +1102,17 @@ int do_write_data_page(struct f2fs_io_info *fio)
set_page_writeback(page);
+ if (__is_valid_data_blkaddr(fio->blk_addr) &&
+ !f2fs_is_valid_blkaddr(fio->sbi, fio->blk_addr,
+ DATA_GENERIC)) {
+ err = -EFAULT;
+ goto out_writepage;
+ }
/*
* If current allocation needs SSR,
* it had better in-place writes for updated data.
*/
- if (unlikely(fio->blk_addr != NEW_ADDR &&
+ if (unlikely(is_valid_data_blkaddr(fio->sbi, fio->blk_addr) &&
!is_cold_data(page) &&
need_inplace_update(inode))) {
rewrite_data_page(fio);
@@ -1482,17 +1513,21 @@ put_next:
if (dn.data_blkaddr == NEW_ADDR) {
zero_user_segment(page, 0, PAGE_CACHE_SIZE);
} else {
- struct f2fs_io_info fio = {
- .sbi = sbi,
- .type = DATA,
- .rw = READ_SYNC,
- .blk_addr = dn.data_blkaddr,
- .page = page,
- .encrypted_page = NULL,
- };
- err = f2fs_submit_page_bio(&fio);
- if (err)
+ struct bio *bio;
+
+ bio = f2fs_grab_bio(inode, dn.data_blkaddr, 1);
+ if (IS_ERR(bio)) {
+ err = PTR_ERR(bio);
goto fail;
+ }
+
+ if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) {
+ bio_put(bio);
+ err = -EFAULT;
+ goto fail;
+ }
+
+ submit_bio(READ_SYNC, bio);
lock_page(page);
if (unlikely(!PageUptodate(page))) {
@@ -1503,13 +1538,6 @@ put_next:
f2fs_put_page(page, 1);
goto repeat;
}
-
- /* avoid symlink page */
- if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) {
- err = f2fs_decrypt_one(inode, page);
- if (err)
- goto fail;
- }
}
out_update:
SetPageUptodate(page);
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 60972a559685..92a240616f52 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -48,7 +48,6 @@ unsigned char f2fs_filetype_table[F2FS_FT_MAX] = {
[F2FS_FT_SYMLINK] = DT_LNK,
};
-#define S_SHIFT 12
static unsigned char f2fs_type_by_mode[S_IFMT >> S_SHIFT] = {
[S_IFREG >> S_SHIFT] = F2FS_FT_REG_FILE,
[S_IFDIR >> S_SHIFT] = F2FS_FT_DIR,
@@ -64,6 +63,13 @@ void set_de_type(struct f2fs_dir_entry *de, umode_t mode)
de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT];
}
+unsigned char get_de_type(struct f2fs_dir_entry *de)
+{
+ if (de->file_type < F2FS_FT_MAX)
+ return f2fs_filetype_table[de->file_type];
+ return DT_UNKNOWN;
+}
+
static unsigned long dir_block_index(unsigned int level,
int dir_level, unsigned int idx)
{
@@ -519,11 +525,7 @@ void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d,
test_and_set_bit_le(bit_pos + i, (void *)d->bitmap);
}
-/*
- * Caller should grab and release a rwsem by calling f2fs_lock_op() and
- * f2fs_unlock_op().
- */
-int __f2fs_add_link(struct inode *dir, const struct qstr *name,
+int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name,
struct inode *inode, nid_t ino, umode_t mode)
{
unsigned int bit_pos;
@@ -536,28 +538,11 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name,
struct f2fs_dentry_block *dentry_blk = NULL;
struct f2fs_dentry_ptr d;
struct page *page = NULL;
- struct f2fs_filename fname;
- struct qstr new_name;
- int slots, err;
-
- err = f2fs_fname_setup_filename(dir, name, 0, &fname);
- if (err)
- return err;
-
- new_name.name = fname_name(&fname);
- new_name.len = fname_len(&fname);
-
- if (f2fs_has_inline_dentry(dir)) {
- err = f2fs_add_inline_entry(dir, &new_name, inode, ino, mode);
- if (!err || err != -EAGAIN)
- goto out;
- else
- err = 0;
- }
+ int slots, err = 0;
level = 0;
- slots = GET_DENTRY_SLOTS(new_name.len);
- dentry_hash = f2fs_dentry_hash(&new_name, NULL);
+ slots = GET_DENTRY_SLOTS(new_name->len);
+ dentry_hash = f2fs_dentry_hash(new_name, NULL);
current_depth = F2FS_I(dir)->i_current_depth;
if (F2FS_I(dir)->chash == dentry_hash) {
@@ -566,10 +551,8 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name,
}
start:
- if (unlikely(current_depth == MAX_DIR_HASH_DEPTH)) {
- err = -ENOSPC;
- goto out;
- }
+ if (unlikely(current_depth == MAX_DIR_HASH_DEPTH))
+ return -ENOSPC;
/* Increase the depth, if required */
if (level == current_depth)
@@ -583,10 +566,8 @@ start:
for (block = bidx; block <= (bidx + nblock - 1); block++) {
dentry_page = get_new_data_page(dir, NULL, block, true);
- if (IS_ERR(dentry_page)) {
- err = PTR_ERR(dentry_page);
- goto out;
- }
+ if (IS_ERR(dentry_page))
+ return PTR_ERR(dentry_page);
dentry_blk = kmap(dentry_page);
bit_pos = room_for_filename(&dentry_blk->dentry_bitmap,
@@ -606,7 +587,7 @@ add_dentry:
if (inode) {
down_write(&F2FS_I(inode)->i_sem);
- page = init_inode_metadata(inode, dir, &new_name, NULL);
+ page = init_inode_metadata(inode, dir, new_name, NULL);
if (IS_ERR(page)) {
err = PTR_ERR(page);
goto fail;
@@ -616,7 +597,7 @@ add_dentry:
}
make_dentry_ptr(NULL, &d, (void *)dentry_blk, 1);
- f2fs_update_dentry(ino, mode, &d, &new_name, dentry_hash, bit_pos);
+ f2fs_update_dentry(ino, mode, &d, new_name, dentry_hash, bit_pos);
set_page_dirty(dentry_page);
@@ -638,7 +619,34 @@ fail:
}
kunmap(dentry_page);
f2fs_put_page(dentry_page, 1);
-out:
+
+ return err;
+}
+
+/*
+ * Caller should grab and release a rwsem by calling f2fs_lock_op() and
+ * f2fs_unlock_op().
+ */
+int __f2fs_add_link(struct inode *dir, const struct qstr *name,
+ struct inode *inode, nid_t ino, umode_t mode)
+{
+ struct f2fs_filename fname;
+ struct qstr new_name;
+ int err;
+
+ err = f2fs_fname_setup_filename(dir, name, 0, &fname);
+ if (err)
+ return err;
+
+ new_name.name = fname_name(&fname);
+ new_name.len = fname_len(&fname);
+
+ err = -EAGAIN;
+ if (f2fs_has_inline_dentry(dir))
+ err = f2fs_add_inline_entry(dir, &new_name, inode, ino, mode);
+ if (err == -EAGAIN)
+ err = f2fs_add_regular_entry(dir, &new_name, inode, ino, mode);
+
f2fs_fname_free_filename(&fname);
return err;
}
@@ -792,10 +800,7 @@ bool f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
break;
de = &d->dentry[bit_pos];
- if (de->file_type < F2FS_FT_MAX)
- d_type = f2fs_filetype_table[de->file_type];
- else
- d_type = DT_UNKNOWN;
+ d_type = get_de_type(de);
de_name.name = d->filename[bit_pos];
de_name.len = le16_to_cpu(de->name_len);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 2871576fbca4..2bfce887dce2 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -135,7 +135,7 @@ struct cp_control {
};
/*
- * For CP/NAT/SIT/SSA readahead
+ * indicate meta/data type
*/
enum {
META_CP,
@@ -143,6 +143,8 @@ enum {
META_SIT,
META_SSA,
META_POR,
+ DATA_GENERIC,
+ META_GENERIC,
};
/* for the list of ino */
@@ -684,6 +686,7 @@ struct f2fs_io_info {
block_t blk_addr; /* block address to be written */
struct page *page; /* page to be written */
struct page *encrypted_page; /* encrypted page */
+ bool is_meta; /* indicate borrow meta inode mapping or not */
};
#define is_read_io(rw) (((rw) & 1) == READ)
@@ -731,6 +734,7 @@ struct f2fs_sb_info {
/* for checkpoint */
struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */
+ int cur_cp_pack; /* remain current cp pack */
struct inode *meta_inode; /* cache meta blocks */
struct mutex cp_mutex; /* checkpoint procedure lock */
struct rw_semaphore cp_rwsem; /* blocking FS operations */
@@ -1140,22 +1144,27 @@ static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag)
static inline block_t __start_cp_addr(struct f2fs_sb_info *sbi)
{
- block_t start_addr;
- struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
- unsigned long long ckpt_version = cur_cp_version(ckpt);
-
- start_addr = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr);
+ block_t start_addr = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr);
- /*
- * odd numbered checkpoint should at cp segment 0
- * and even segment must be at cp segment 1
- */
- if (!(ckpt_version & 1))
+ if (sbi->cur_cp_pack == 2)
start_addr += sbi->blocks_per_seg;
+ return start_addr;
+}
+
+static inline block_t __start_cp_next_addr(struct f2fs_sb_info *sbi)
+{
+ block_t start_addr = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr);
+ if (sbi->cur_cp_pack == 1)
+ start_addr += sbi->blocks_per_seg;
return start_addr;
}
+static inline void __set_cp_next_pack(struct f2fs_sb_info *sbi)
+{
+ sbi->cur_cp_pack = (sbi->cur_cp_pack == 1) ? 2 : 1;
+}
+
static inline block_t __start_sum_addr(struct f2fs_sb_info *sbi)
{
return le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_start_sum);
@@ -1402,7 +1411,6 @@ enum {
FI_NO_ALLOC, /* should not allocate any blocks */
FI_FREE_NID, /* free allocated nide */
FI_UPDATE_DIR, /* should update inode block for consistency */
- FI_DELAY_IPUT, /* used for the recovery */
FI_NO_EXTENT, /* not to use the extent cache */
FI_INLINE_XATTR, /* used for inline xattr */
FI_INLINE_DATA, /* used for inline data*/
@@ -1641,6 +1649,39 @@ static inline void *f2fs_kvzalloc(size_t size, gfp_t flags)
(pgofs - ADDRS_PER_INODE(fi) + ADDRS_PER_BLOCK) / \
ADDRS_PER_BLOCK * ADDRS_PER_BLOCK + ADDRS_PER_INODE(fi))
+#define __is_meta_io(fio) (PAGE_TYPE_OF_BIO(fio->type) == META && \
+ (!is_read_io(fio->rw) || fio->is_meta))
+
+bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
+ block_t blkaddr, int type);
+void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...);
+static inline void verify_blkaddr(struct f2fs_sb_info *sbi,
+ block_t blkaddr, int type)
+{
+ if (!f2fs_is_valid_blkaddr(sbi, blkaddr, type)) {
+ f2fs_msg(sbi->sb, KERN_ERR,
+ "invalid blkaddr: %u, type: %d, run fsck to fix.",
+ blkaddr, type);
+ f2fs_bug_on(sbi, 1);
+ }
+}
+
+static inline bool __is_valid_data_blkaddr(block_t blkaddr)
+{
+ if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR)
+ return false;
+ return true;
+}
+
+static inline bool is_valid_data_blkaddr(struct f2fs_sb_info *sbi,
+ block_t blkaddr)
+{
+ if (!__is_valid_data_blkaddr(blkaddr))
+ return false;
+ verify_blkaddr(sbi, blkaddr, DATA_GENERIC);
+ return true;
+}
+
/*
* file.c
*/
@@ -1677,7 +1718,7 @@ struct dentry *f2fs_get_parent(struct dentry *child);
*/
extern unsigned char f2fs_filetype_table[F2FS_FT_MAX];
void set_de_type(struct f2fs_dir_entry *, umode_t);
-
+unsigned char get_de_type(struct f2fs_dir_entry *);
struct f2fs_dir_entry *find_target_dentry(struct f2fs_filename *,
f2fs_hash_t, int *, struct f2fs_dentry_ptr *);
bool f2fs_fill_dentries(struct dir_context *, struct f2fs_dentry_ptr *,
@@ -1698,6 +1739,8 @@ void f2fs_set_link(struct inode *, struct f2fs_dir_entry *,
int update_dent_inode(struct inode *, struct inode *, const struct qstr *);
void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *,
const struct qstr *, f2fs_hash_t , unsigned int);
+int f2fs_add_regular_entry(struct inode *, const struct qstr *,
+ struct inode *, nid_t, umode_t);
int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *, nid_t,
umode_t);
void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *,
@@ -1718,6 +1761,7 @@ int f2fs_commit_super(struct f2fs_sb_info *, bool);
int f2fs_sync_fs(struct super_block *, int);
extern __printf(3, 4)
void f2fs_msg(struct super_block *, const char *, const char *, ...);
+int sanity_check_ckpt(struct f2fs_sb_info *sbi);
/*
* hash.c
@@ -1778,7 +1822,6 @@ bool is_checkpointed_data(struct f2fs_sb_info *, block_t);
void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t);
void clear_prefree_segments(struct f2fs_sb_info *, struct cp_control *);
void release_discard_addrs(struct f2fs_sb_info *);
-bool discard_next_dnode(struct f2fs_sb_info *, block_t);
int npages_for_summary_flush(struct f2fs_sb_info *, bool);
void allocate_new_segments(struct f2fs_sb_info *);
int f2fs_trim_fs(struct f2fs_sb_info *, struct fstrim_range *);
@@ -1810,7 +1853,8 @@ void destroy_segment_manager_caches(void);
struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t);
struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t);
struct page *get_tmp_page(struct f2fs_sb_info *, pgoff_t);
-bool is_valid_blkaddr(struct f2fs_sb_info *, block_t, int);
+bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
+ block_t blkaddr, int type);
int ra_meta_pages(struct f2fs_sb_info *, block_t, int, int, bool);
void ra_meta_pages_cond(struct f2fs_sb_info *, pgoff_t);
long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long);
@@ -1825,7 +1869,6 @@ void remove_orphan_inode(struct f2fs_sb_info *, nid_t);
int recover_orphan_inodes(struct f2fs_sb_info *);
int get_valid_checkpoint(struct f2fs_sb_info *);
void update_dirty_page(struct inode *, struct page *);
-void add_dirty_dir_inode(struct inode *);
void remove_dirty_dir_inode(struct inode *);
void sync_dirty_dir_inodes(struct f2fs_sb_info *);
void write_checkpoint(struct f2fs_sb_info *, struct cp_control *);
@@ -1864,7 +1907,7 @@ void build_gc_manager(struct f2fs_sb_info *);
/*
* recovery.c
*/
-int recover_fsync_data(struct f2fs_sb_info *);
+int recover_fsync_data(struct f2fs_sb_info *, bool);
bool space_for_roll_forward(struct f2fs_sb_info *);
/*
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 01eed94b01ea..bee3bc7a16ac 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -200,6 +200,9 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
trace_f2fs_sync_file_enter(inode);
+ if (S_ISDIR(inode->i_mode))
+ goto go_write;
+
/* if fdatasync is triggered, let's do in-place-update */
if (get_dirty_pages(inode) <= SM_I(sbi)->min_fsync_blocks)
set_inode_flag(fi, FI_NEED_IPU);
@@ -305,13 +308,13 @@ static pgoff_t __get_first_dirty_index(struct address_space *mapping,
return pgofs;
}
-static bool __found_offset(block_t blkaddr, pgoff_t dirty, pgoff_t pgofs,
- int whence)
+static bool __found_offset(struct f2fs_sb_info *sbi, block_t blkaddr,
+ pgoff_t dirty, pgoff_t pgofs, int whence)
{
switch (whence) {
case SEEK_DATA:
if ((blkaddr == NEW_ADDR && dirty == pgofs) ||
- (blkaddr != NEW_ADDR && blkaddr != NULL_ADDR))
+ is_valid_data_blkaddr(sbi, blkaddr))
return true;
break;
case SEEK_HOLE:
@@ -374,7 +377,15 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
block_t blkaddr;
blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
- if (__found_offset(blkaddr, dirty, pgofs, whence)) {
+ if (__is_valid_data_blkaddr(blkaddr) &&
+ !f2fs_is_valid_blkaddr(F2FS_I_SB(inode),
+ blkaddr, DATA_GENERIC)) {
+ f2fs_put_dnode(&dn);
+ goto fail;
+ }
+
+ if (__found_offset(F2FS_I_SB(inode), blkaddr, dirty,
+ pgofs, whence)) {
f2fs_put_dnode(&dn);
goto found;
}
@@ -466,6 +477,11 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count)
dn->data_blkaddr = NULL_ADDR;
set_data_blkaddr(dn);
+
+ if (__is_valid_data_blkaddr(blkaddr) &&
+ !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC))
+ continue;
+
invalidate_blocks(sbi, blkaddr);
if (dn->ofs_in_node == 0 && IS_INODE(dn->node_page))
clear_inode_flag(F2FS_I(dn->inode),
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index ad80f916b64d..00685a8b1418 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -127,6 +127,16 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
if (err)
return err;
+ if (unlikely(dn->data_blkaddr != NEW_ADDR)) {
+ f2fs_put_dnode(dn);
+ set_sbi_flag(fio.sbi, SBI_NEED_FSCK);
+ f2fs_msg(fio.sbi->sb, KERN_WARNING,
+ "%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, "
+ "run fsck to fix.",
+ __func__, dn->inode->i_ino, dn->data_blkaddr);
+ return -EINVAL;
+ }
+
f2fs_wait_on_page_writeback(page, DATA);
if (PageUptodate(page))
@@ -367,7 +377,7 @@ int make_empty_inline_dir(struct inode *inode, struct inode *parent,
* NOTE: ipage is grabbed by caller, but if any error occurs, we should
* release ipage in this function.
*/
-static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage,
+static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage,
struct f2fs_inline_dentry *inline_dentry)
{
struct page *page;
@@ -386,6 +396,17 @@ static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage,
if (err)
goto out;
+ if (unlikely(dn.data_blkaddr != NEW_ADDR)) {
+ f2fs_put_dnode(&dn);
+ set_sbi_flag(F2FS_P_SB(page), SBI_NEED_FSCK);
+ f2fs_msg(F2FS_P_SB(page)->sb, KERN_WARNING,
+ "%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, "
+ "run fsck to fix.",
+ __func__, dir->i_ino, dn.data_blkaddr);
+ err = -EINVAL;
+ goto out;
+ }
+
f2fs_wait_on_page_writeback(page, DATA);
zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE);
@@ -428,6 +449,98 @@ out:
return err;
}
+static int f2fs_add_inline_entries(struct inode *dir,
+ struct f2fs_inline_dentry *inline_dentry)
+{
+ struct f2fs_dentry_ptr d;
+ unsigned long bit_pos = 0;
+ int err = 0;
+
+ make_dentry_ptr(NULL, &d, (void *)inline_dentry, 2);
+
+ while (bit_pos < d.max) {
+ struct f2fs_dir_entry *de;
+ struct qstr new_name;
+ nid_t ino;
+ umode_t fake_mode;
+
+ if (!test_bit_le(bit_pos, d.bitmap)) {
+ bit_pos++;
+ continue;
+ }
+
+ de = &d.dentry[bit_pos];
+ new_name.name = d.filename[bit_pos];
+ new_name.len = de->name_len;
+
+ ino = le32_to_cpu(de->ino);
+ fake_mode = get_de_type(de) << S_SHIFT;
+
+ err = f2fs_add_regular_entry(dir, &new_name, NULL,
+ ino, fake_mode);
+ if (err)
+ goto punch_dentry_pages;
+
+ if (unlikely(!de->name_len))
+ d.max = -1;
+
+ bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
+ }
+ return 0;
+punch_dentry_pages:
+ truncate_inode_pages(&dir->i_data, 0);
+ truncate_blocks(dir, 0, false);
+ remove_dirty_dir_inode(dir);
+ return err;
+}
+
+static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage,
+ struct f2fs_inline_dentry *inline_dentry)
+{
+ struct f2fs_inline_dentry *backup_dentry;
+ int err;
+
+ backup_dentry = kmalloc(sizeof(struct f2fs_inline_dentry),
+ GFP_F2FS_ZERO);
+ if (!backup_dentry)
+ return -ENOMEM;
+
+ memcpy(backup_dentry, inline_dentry, MAX_INLINE_DATA);
+ truncate_inline_inode(ipage, 0);
+
+ unlock_page(ipage);
+
+ err = f2fs_add_inline_entries(dir, backup_dentry);
+ if (err)
+ goto recover;
+
+ lock_page(ipage);
+
+ stat_dec_inline_dir(dir);
+ clear_inode_flag(F2FS_I(dir), FI_INLINE_DENTRY);
+ update_inode(dir, ipage);
+ kfree(backup_dentry);
+ return 0;
+recover:
+ lock_page(ipage);
+ memcpy(inline_dentry, backup_dentry, MAX_INLINE_DATA);
+ i_size_write(dir, MAX_INLINE_DATA);
+ update_inode(dir, ipage);
+ f2fs_put_page(ipage, 1);
+
+ kfree(backup_dentry);
+ return err;
+}
+
+static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage,
+ struct f2fs_inline_dentry *inline_dentry)
+{
+ if (!F2FS_I(dir)->i_dir_level)
+ return f2fs_move_inline_dirents(dir, ipage, inline_dentry);
+ else
+ return f2fs_move_rehashed_dirents(dir, ipage, inline_dentry);
+}
+
int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name,
struct inode *inode, nid_t ino, umode_t mode)
{
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 5528801a5baf..89bf8dd7758c 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -50,13 +50,16 @@ static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
}
}
-static bool __written_first_block(struct f2fs_inode *ri)
+static int __written_first_block(struct f2fs_sb_info *sbi,
+ struct f2fs_inode *ri)
{
block_t addr = le32_to_cpu(ri->i_addr[0]);
- if (addr != NEW_ADDR && addr != NULL_ADDR)
- return true;
- return false;
+ if (!__is_valid_data_blkaddr(addr))
+ return 1;
+ if (!f2fs_is_valid_blkaddr(sbi, addr, DATA_GENERIC))
+ return -EFAULT;
+ return 0;
}
static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
@@ -94,12 +97,57 @@ static void __recover_inline_status(struct inode *inode, struct page *ipage)
return;
}
+static bool sanity_check_inode(struct inode *inode, struct page *node_page)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ unsigned long long iblocks;
+
+ iblocks = le64_to_cpu(F2FS_INODE(node_page)->i_blocks);
+ if (!iblocks) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "%s: corrupted inode i_blocks i_ino=%lx iblocks=%llu, "
+ "run fsck to fix.",
+ __func__, inode->i_ino, iblocks);
+ return false;
+ }
+
+ if (ino_of_node(node_page) != nid_of_node(node_page)) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "%s: corrupted inode footer i_ino=%lx, ino,nid: "
+ "[%u, %u] run fsck to fix.",
+ __func__, inode->i_ino,
+ ino_of_node(node_page), nid_of_node(node_page));
+ return false;
+ }
+
+ if (F2FS_I(inode)->extent_tree) {
+ struct extent_info *ei = &F2FS_I(inode)->extent_tree->largest;
+
+ if (ei->len &&
+ (!f2fs_is_valid_blkaddr(sbi, ei->blk, DATA_GENERIC) ||
+ !f2fs_is_valid_blkaddr(sbi, ei->blk + ei->len - 1,
+ DATA_GENERIC))) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "%s: inode (ino=%lx) extent info [%u, %u, %u] "
+ "is incorrect, run fsck to fix",
+ __func__, inode->i_ino,
+ ei->blk, ei->fofs, ei->len);
+ return false;
+ }
+ }
+ return true;
+}
+
static int do_read_inode(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_inode_info *fi = F2FS_I(inode);
struct page *node_page;
struct f2fs_inode *ri;
+ int err;
/* Check if ino is within scope */
if (check_nid_range(sbi, inode->i_ino)) {
@@ -142,6 +190,11 @@ static int do_read_inode(struct inode *inode)
get_inline_info(fi, ri);
+ if (!sanity_check_inode(inode, node_page)) {
+ f2fs_put_page(node_page, 1);
+ return -EINVAL;
+ }
+
/* check data exist */
if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode))
__recover_inline_status(inode, node_page);
@@ -149,7 +202,12 @@ static int do_read_inode(struct inode *inode)
/* get rdev by using inline_info */
__get_inode_rdev(inode, ri);
- if (__written_first_block(ri))
+ err = __written_first_block(sbi, ri);
+ if (err < 0) {
+ f2fs_put_page(node_page, 1);
+ return err;
+ }
+ if (!err)
set_inode_flag(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN);
f2fs_put_page(node_page, 1);
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 7bcbc6e9c40d..582373849332 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -261,13 +261,11 @@ static void cache_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid,
{
struct nat_entry *e;
- down_write(&nm_i->nat_tree_lock);
e = __lookup_nat_cache(nm_i, nid);
if (!e) {
e = grab_nat_entry(nm_i, nid);
node_info_from_raw_nat(&e->ni, ne);
}
- up_write(&nm_i->nat_tree_lock);
}
static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
@@ -298,8 +296,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
new_blkaddr == NULL_ADDR);
f2fs_bug_on(sbi, nat_get_blkaddr(e) == NEW_ADDR &&
new_blkaddr == NEW_ADDR);
- f2fs_bug_on(sbi, nat_get_blkaddr(e) != NEW_ADDR &&
- nat_get_blkaddr(e) != NULL_ADDR &&
+ f2fs_bug_on(sbi, is_valid_data_blkaddr(sbi, nat_get_blkaddr(e)) &&
new_blkaddr == NEW_ADDR);
/* increment version no as node is removed */
@@ -314,7 +311,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
/* change address */
nat_set_blkaddr(e, new_blkaddr);
- if (new_blkaddr == NEW_ADDR || new_blkaddr == NULL_ADDR)
+ if (!is_valid_data_blkaddr(sbi, new_blkaddr))
set_nat_flag(e, IS_CHECKPOINTED, false);
__set_nat_cache_dirty(nm_i, e);
@@ -379,6 +376,8 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
memset(&ne, 0, sizeof(struct f2fs_nat_entry));
+ down_write(&nm_i->nat_tree_lock);
+
/* Check current segment summary */
mutex_lock(&curseg->curseg_mutex);
i = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 0);
@@ -399,6 +398,7 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
cache:
/* cache nat entry */
cache_nat_entry(NM_I(sbi), nid, &ne);
+ up_write(&nm_i->nat_tree_lock);
}
/*
@@ -590,6 +590,7 @@ static void truncate_node(struct dnode_of_data *dn)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
struct node_info ni;
+ pgoff_t index;
get_node_info(sbi, dn->nid, &ni);
if (dn->inode->i_blocks == 0) {
@@ -613,10 +614,11 @@ invalidate:
clear_node_page_dirty(dn->node_page);
set_sbi_flag(sbi, SBI_IS_DIRTY);
+ index = dn->node_page->index;
f2fs_put_page(dn->node_page, 1);
invalidate_mapping_pages(NODE_MAPPING(sbi),
- dn->node_page->index, dn->node_page->index);
+ index, index);
dn->node_page = NULL;
trace_f2fs_truncate_node(dn->inode, dn->nid, ni.blk_addr);
@@ -1341,6 +1343,12 @@ static int f2fs_write_node_page(struct page *page,
return 0;
}
+ if (__is_valid_data_blkaddr(ni.blk_addr) &&
+ !f2fs_is_valid_blkaddr(sbi, ni.blk_addr, DATA_GENERIC)) {
+ up_read(&sbi->node_write);
+ goto redirty_out;
+ }
+
set_page_writeback(page);
fio.blk_addr = ni.blk_addr;
write_node_page(nid, &fio);
@@ -1427,9 +1435,9 @@ static void __del_from_free_nid_list(struct f2fs_nm_info *nm_i,
static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
- struct free_nid *i;
+ struct free_nid *i, *e;
struct nat_entry *ne;
- bool allocated = false;
+ int err = -EINVAL;
if (!available_free_memory(sbi, FREE_NIDS))
return -1;
@@ -1438,40 +1446,58 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
if (unlikely(nid == 0))
return 0;
- if (build) {
- /* do not add allocated nids */
- down_read(&nm_i->nat_tree_lock);
- ne = __lookup_nat_cache(nm_i, nid);
- if (ne &&
- (!get_nat_flag(ne, IS_CHECKPOINTED) ||
- nat_get_blkaddr(ne) != NULL_ADDR))
- allocated = true;
- up_read(&nm_i->nat_tree_lock);
- if (allocated)
- return 0;
- }
-
i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS);
i->nid = nid;
i->state = NID_NEW;
- if (radix_tree_preload(GFP_NOFS)) {
- kmem_cache_free(free_nid_slab, i);
- return 0;
- }
+ if (radix_tree_preload(GFP_NOFS))
+ goto err;
spin_lock(&nm_i->free_nid_list_lock);
- if (radix_tree_insert(&nm_i->free_nid_root, i->nid, i)) {
- spin_unlock(&nm_i->free_nid_list_lock);
- radix_tree_preload_end();
- kmem_cache_free(free_nid_slab, i);
- return 0;
+
+ if (build) {
+ /*
+ * Thread A Thread B
+ * - f2fs_create
+ * - f2fs_new_inode
+ * - alloc_nid
+ * - __insert_nid_to_list(ALLOC_NID_LIST)
+ * - f2fs_balance_fs_bg
+ * - build_free_nids
+ * - __build_free_nids
+ * - scan_nat_page
+ * - add_free_nid
+ * - __lookup_nat_cache
+ * - f2fs_add_link
+ * - init_inode_metadata
+ * - new_inode_page
+ * - new_node_page
+ * - set_node_addr
+ * - alloc_nid_done
+ * - __remove_nid_from_list(ALLOC_NID_LIST)
+ * - __insert_nid_to_list(FREE_NID_LIST)
+ */
+ ne = __lookup_nat_cache(nm_i, nid);
+ if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) ||
+ nat_get_blkaddr(ne) != NULL_ADDR))
+ goto err_out;
+
+ e = __lookup_free_nid_list(nm_i, nid);
+ if (e)
+ goto err_out;
}
+ if (radix_tree_insert(&nm_i->free_nid_root, i->nid, i))
+ goto err_out;
+ err = 0;
list_add_tail(&i->list, &nm_i->free_nid_list);
nm_i->fcnt++;
+err_out:
spin_unlock(&nm_i->free_nid_list_lock);
radix_tree_preload_end();
- return 1;
+err:
+ if (err)
+ kmem_cache_free(free_nid_slab, i);
+ return !err;
}
static void remove_free_nid(struct f2fs_nm_info *nm_i, nid_t nid)
@@ -1532,6 +1558,8 @@ static void build_free_nids(struct f2fs_sb_info *sbi)
ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
META_NAT, true);
+ down_read(&nm_i->nat_tree_lock);
+
while (1) {
struct page *page = get_current_nat_page(sbi, nid);
@@ -1560,6 +1588,7 @@ static void build_free_nids(struct f2fs_sb_info *sbi)
remove_free_nid(nm_i, nid);
}
mutex_unlock(&curseg->curseg_mutex);
+ up_read(&nm_i->nat_tree_lock);
ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid),
nm_i->ra_nid_pages, META_NAT, false);
@@ -1842,14 +1871,12 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
raw_ne = nat_in_journal(sum, i);
- down_write(&nm_i->nat_tree_lock);
ne = __lookup_nat_cache(nm_i, nid);
if (!ne) {
ne = grab_nat_entry(nm_i, nid);
node_info_from_raw_nat(&ne->ni, &raw_ne);
}
__set_nat_cache_dirty(nm_i, ne);
- up_write(&nm_i->nat_tree_lock);
}
update_nats_in_cursum(sum, -i);
mutex_unlock(&curseg->curseg_mutex);
@@ -1883,7 +1910,6 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
struct f2fs_nat_block *nat_blk;
struct nat_entry *ne, *cur;
struct page *page = NULL;
- struct f2fs_nm_info *nm_i = NM_I(sbi);
/*
* there are two steps to flush nat entries:
@@ -1920,12 +1946,8 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
raw_ne = &nat_blk->entries[nid - start_nid];
}
raw_nat_from_node_info(raw_ne, &ne->ni);
-
- down_write(&NM_I(sbi)->nat_tree_lock);
nat_reset_flag(ne);
__clear_nat_cache_dirty(NM_I(sbi), ne);
- up_write(&NM_I(sbi)->nat_tree_lock);
-
if (nat_get_blkaddr(ne) == NULL_ADDR)
add_free_nid(sbi, nid, false);
}
@@ -1937,9 +1959,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
f2fs_bug_on(sbi, set->entry_cnt);
- down_write(&nm_i->nat_tree_lock);
radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set);
- up_write(&nm_i->nat_tree_lock);
kmem_cache_free(nat_entry_set_slab, set);
}
@@ -1959,6 +1979,9 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
if (!nm_i->dirty_nat_cnt)
return;
+
+ down_write(&nm_i->nat_tree_lock);
+
/*
* if there are no enough space in journal to store dirty nat
* entries, remove all entries from journal and merge them
@@ -1967,7 +1990,6 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL))
remove_nats_in_journal(sbi);
- down_write(&nm_i->nat_tree_lock);
while ((found = __gang_lookup_nat_set(nm_i,
set_idx, SETVEC_SIZE, setvec))) {
unsigned idx;
@@ -1976,12 +1998,13 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
__adjust_nat_entry_set(setvec[idx], &sets,
MAX_NAT_JENTRIES(sum));
}
- up_write(&nm_i->nat_tree_lock);
/* flush dirty nats in nat entry set */
list_for_each_entry_safe(set, tmp, &sets, set_list)
__flush_nat_entry_set(sbi, set);
+ up_write(&nm_i->nat_tree_lock);
+
f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
}
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index e4fffd2d98c4..0d6f0e3dc655 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -212,6 +212,37 @@ static inline void set_to_next_nat(struct f2fs_nm_info *nm_i, nid_t start_nid)
f2fs_change_bit(block_off, nm_i->nat_bitmap);
}
+static inline nid_t ino_of_node(struct page *node_page)
+{
+ struct f2fs_node *rn = F2FS_NODE(node_page);
+ return le32_to_cpu(rn->footer.ino);
+}
+
+static inline nid_t nid_of_node(struct page *node_page)
+{
+ struct f2fs_node *rn = F2FS_NODE(node_page);
+ return le32_to_cpu(rn->footer.nid);
+}
+
+static inline unsigned int ofs_of_node(struct page *node_page)
+{
+ struct f2fs_node *rn = F2FS_NODE(node_page);
+ unsigned flag = le32_to_cpu(rn->footer.flag);
+ return flag >> OFFSET_BIT_SHIFT;
+}
+
+static inline __u64 cpver_of_node(struct page *node_page)
+{
+ struct f2fs_node *rn = F2FS_NODE(node_page);
+ return le64_to_cpu(rn->footer.cp_ver);
+}
+
+static inline block_t next_blkaddr_of_node(struct page *node_page)
+{
+ struct f2fs_node *rn = F2FS_NODE(node_page);
+ return le32_to_cpu(rn->footer.next_blkaddr);
+}
+
static inline void fill_node_footer(struct page *page, nid_t nid,
nid_t ino, unsigned int ofs, bool reset)
{
@@ -242,40 +273,30 @@ static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr)
{
struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page));
struct f2fs_node *rn = F2FS_NODE(page);
+ size_t crc_offset = le32_to_cpu(ckpt->checksum_offset);
+ __u64 cp_ver = le64_to_cpu(ckpt->checkpoint_ver);
- rn->footer.cp_ver = ckpt->checkpoint_ver;
+ if (is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG)) {
+ __u64 crc = le32_to_cpu(*((__le32 *)
+ ((unsigned char *)ckpt + crc_offset)));
+ cp_ver |= (crc << 32);
+ }
+ rn->footer.cp_ver = cpu_to_le64(cp_ver);
rn->footer.next_blkaddr = cpu_to_le32(blkaddr);
}
-static inline nid_t ino_of_node(struct page *node_page)
-{
- struct f2fs_node *rn = F2FS_NODE(node_page);
- return le32_to_cpu(rn->footer.ino);
-}
-
-static inline nid_t nid_of_node(struct page *node_page)
+static inline bool is_recoverable_dnode(struct page *page)
{
- struct f2fs_node *rn = F2FS_NODE(node_page);
- return le32_to_cpu(rn->footer.nid);
-}
-
-static inline unsigned int ofs_of_node(struct page *node_page)
-{
- struct f2fs_node *rn = F2FS_NODE(node_page);
- unsigned flag = le32_to_cpu(rn->footer.flag);
- return flag >> OFFSET_BIT_SHIFT;
-}
-
-static inline unsigned long long cpver_of_node(struct page *node_page)
-{
- struct f2fs_node *rn = F2FS_NODE(node_page);
- return le64_to_cpu(rn->footer.cp_ver);
-}
+ struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page));
+ size_t crc_offset = le32_to_cpu(ckpt->checksum_offset);
+ __u64 cp_ver = cur_cp_version(ckpt);
-static inline block_t next_blkaddr_of_node(struct page *node_page)
-{
- struct f2fs_node *rn = F2FS_NODE(node_page);
- return le32_to_cpu(rn->footer.next_blkaddr);
+ if (is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG)) {
+ __u64 crc = le32_to_cpu(*((__le32 *)
+ ((unsigned char *)ckpt + crc_offset)));
+ cp_ver |= (crc << 32);
+ }
+ return cpu_to_le64(cp_ver) == cpver_of_node(page);
}
/*
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index e32f349f341b..2878be3e448f 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -67,7 +67,30 @@ static struct fsync_inode_entry *get_fsync_inode(struct list_head *head,
return NULL;
}
-static int recover_dentry(struct inode *inode, struct page *ipage)
+static struct fsync_inode_entry *add_fsync_inode(struct list_head *head,
+ struct inode *inode)
+{
+ struct fsync_inode_entry *entry;
+
+ entry = kmem_cache_alloc(fsync_entry_slab, GFP_F2FS_ZERO);
+ if (!entry)
+ return NULL;
+
+ entry->inode = inode;
+ list_add_tail(&entry->list, head);
+
+ return entry;
+}
+
+static void del_fsync_inode(struct fsync_inode_entry *entry)
+{
+ iput(entry->inode);
+ list_del(&entry->list);
+ kmem_cache_free(fsync_entry_slab, entry);
+}
+
+static int recover_dentry(struct inode *inode, struct page *ipage,
+ struct list_head *dir_list)
{
struct f2fs_inode *raw_inode = F2FS_INODE(ipage);
nid_t pino = le32_to_cpu(raw_inode->i_pino);
@@ -75,18 +98,29 @@ static int recover_dentry(struct inode *inode, struct page *ipage)
struct qstr name;
struct page *page;
struct inode *dir, *einode;
+ struct fsync_inode_entry *entry;
int err = 0;
- dir = f2fs_iget(inode->i_sb, pino);
- if (IS_ERR(dir)) {
- err = PTR_ERR(dir);
- goto out;
+ entry = get_fsync_inode(dir_list, pino);
+ if (!entry) {
+ dir = f2fs_iget(inode->i_sb, pino);
+ if (IS_ERR(dir)) {
+ err = PTR_ERR(dir);
+ goto out;
+ }
+
+ entry = add_fsync_inode(dir_list, dir);
+ if (!entry) {
+ err = -ENOMEM;
+ iput(dir);
+ goto out;
+ }
}
- if (file_enc_name(inode)) {
- iput(dir);
+ dir = entry->inode;
+
+ if (file_enc_name(inode))
return 0;
- }
name.len = le32_to_cpu(raw_inode->i_namelen);
name.name = raw_inode->i_name;
@@ -94,7 +128,7 @@ static int recover_dentry(struct inode *inode, struct page *ipage)
if (unlikely(name.len > F2FS_NAME_LEN)) {
WARN_ON(1);
err = -ENAMETOOLONG;
- goto out_err;
+ goto out;
}
retry:
de = f2fs_find_entry(dir, &name, &page);
@@ -120,23 +154,12 @@ retry:
goto retry;
}
err = __f2fs_add_link(dir, &name, inode, inode->i_ino, inode->i_mode);
- if (err)
- goto out_err;
-
- if (is_inode_flag_set(F2FS_I(dir), FI_DELAY_IPUT)) {
- iput(dir);
- } else {
- add_dirty_dir_inode(dir);
- set_inode_flag(F2FS_I(dir), FI_DELAY_IPUT);
- }
goto out;
out_unmap_put:
f2fs_dentry_kunmap(dir, page);
f2fs_put_page(page, 0);
-out_err:
- iput(dir);
out:
f2fs_msg(inode->i_sb, KERN_NOTICE,
"%s: ino = %x, name = %s, dir = %lx, err = %d",
@@ -170,8 +193,8 @@ static void recover_inode(struct inode *inode, struct page *page)
static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
{
- unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
struct curseg_info *curseg;
+ struct inode *inode;
struct page *page = NULL;
block_t blkaddr;
int err = 0;
@@ -185,12 +208,12 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
while (1) {
struct fsync_inode_entry *entry;
- if (!is_valid_blkaddr(sbi, blkaddr, META_POR))
+ if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR))
return 0;
page = get_tmp_page(sbi, blkaddr);
- if (cp_ver != cpver_of_node(page))
+ if (!is_recoverable_dnode(page))
break;
if (!is_fsync_dnode(page))
@@ -204,27 +227,27 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
break;
}
- /* add this fsync inode to the list */
- entry = kmem_cache_alloc(fsync_entry_slab, GFP_F2FS_ZERO);
- if (!entry) {
- err = -ENOMEM;
- break;
- }
/*
* CP | dnode(F) | inode(DF)
* For this case, we should not give up now.
*/
- entry->inode = f2fs_iget(sbi->sb, ino_of_node(page));
- if (IS_ERR(entry->inode)) {
- err = PTR_ERR(entry->inode);
- kmem_cache_free(fsync_entry_slab, entry);
+ inode = f2fs_iget(sbi->sb, ino_of_node(page));
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
if (err == -ENOENT) {
err = 0;
goto next;
}
break;
}
- list_add_tail(&entry->list, head);
+
+ /* add this fsync inode to the list */
+ entry = add_fsync_inode(head, inode);
+ if (!entry) {
+ err = -ENOMEM;
+ iput(inode);
+ break;
+ }
}
entry->blkaddr = blkaddr;
@@ -248,11 +271,8 @@ static void destroy_fsync_dnodes(struct list_head *head)
{
struct fsync_inode_entry *entry, *tmp;
- list_for_each_entry_safe(entry, tmp, head, list) {
- iput(entry->inode);
- list_del(&entry->list);
- kmem_cache_free(fsync_entry_slab, entry);
- }
+ list_for_each_entry_safe(entry, tmp, head, list)
+ del_fsync_inode(entry);
}
static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
@@ -423,7 +443,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
}
/* dest is valid block, try to recover from src to dest */
- if (is_valid_blkaddr(sbi, dest, META_POR)) {
+ if (f2fs_is_valid_blkaddr(sbi, dest, META_POR)) {
if (src == NULL_ADDR) {
err = reserve_new_block(&dn);
@@ -459,35 +479,34 @@ out:
return err;
}
-static int recover_data(struct f2fs_sb_info *sbi,
- struct list_head *head, int type)
+static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
+ struct list_head *dir_list)
{
- unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
struct curseg_info *curseg;
struct page *page = NULL;
int err = 0;
block_t blkaddr;
/* get node pages in the current segment */
- curseg = CURSEG_I(sbi, type);
+ curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
while (1) {
struct fsync_inode_entry *entry;
- if (!is_valid_blkaddr(sbi, blkaddr, META_POR))
+ if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR))
break;
ra_meta_pages_cond(sbi, blkaddr);
page = get_tmp_page(sbi, blkaddr);
- if (cp_ver != cpver_of_node(page)) {
+ if (!is_recoverable_dnode(page)) {
f2fs_put_page(page, 1);
break;
}
- entry = get_fsync_inode(head, ino_of_node(page));
+ entry = get_fsync_inode(inode_list, ino_of_node(page));
if (!entry)
goto next;
/*
@@ -498,7 +517,7 @@ static int recover_data(struct f2fs_sb_info *sbi,
if (entry->last_inode == blkaddr)
recover_inode(entry->inode, page);
if (entry->last_dentry == blkaddr) {
- err = recover_dentry(entry->inode, page);
+ err = recover_dentry(entry->inode, page, dir_list);
if (err) {
f2fs_put_page(page, 1);
break;
@@ -510,11 +529,8 @@ static int recover_data(struct f2fs_sb_info *sbi,
break;
}
- if (entry->blkaddr == blkaddr) {
- iput(entry->inode);
- list_del(&entry->list);
- kmem_cache_free(fsync_entry_slab, entry);
- }
+ if (entry->blkaddr == blkaddr)
+ del_fsync_inode(entry);
next:
/* check next segment */
blkaddr = next_blkaddr_of_node(page);
@@ -525,12 +541,14 @@ next:
return err;
}
-int recover_fsync_data(struct f2fs_sb_info *sbi)
+int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
{
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
struct list_head inode_list;
+ struct list_head dir_list;
block_t blkaddr;
int err;
+ int ret = 0;
bool need_writecp = false;
fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
@@ -539,6 +557,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
return -ENOMEM;
INIT_LIST_HEAD(&inode_list);
+ INIT_LIST_HEAD(&dir_list);
/* prevent checkpoint */
mutex_lock(&sbi->cp_mutex);
@@ -547,21 +566,22 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
/* step #1: find fsynced inode numbers */
err = find_fsync_dnodes(sbi, &inode_list);
- if (err)
+ if (err || list_empty(&inode_list))
goto out;
- if (list_empty(&inode_list))
+ if (check_only) {
+ ret = 1;
goto out;
+ }
need_writecp = true;
/* step #2: recover data */
- err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE);
+ err = recover_data(sbi, &inode_list, &dir_list);
if (!err)
f2fs_bug_on(sbi, !list_empty(&inode_list));
out:
destroy_fsync_dnodes(&inode_list);
- kmem_cache_destroy(fsync_entry_slab);
/* truncate meta pages to be used by the recovery */
truncate_inode_pages_range(META_MAPPING(sbi),
@@ -573,31 +593,20 @@ out:
}
clear_sbi_flag(sbi, SBI_POR_DOING);
- if (err) {
- bool invalidate = false;
-
- if (discard_next_dnode(sbi, blkaddr))
- invalidate = true;
-
- /* Flush all the NAT/SIT pages */
- while (get_pages(sbi, F2FS_DIRTY_META))
- sync_meta_pages(sbi, META, LONG_MAX);
+ if (err)
+ set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
+ mutex_unlock(&sbi->cp_mutex);
- /* invalidate temporary meta page */
- if (invalidate)
- invalidate_mapping_pages(META_MAPPING(sbi),
- blkaddr, blkaddr);
+ /* let's drop all the directory inodes for clean checkpoint */
+ destroy_fsync_dnodes(&dir_list);
- set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
- mutex_unlock(&sbi->cp_mutex);
- } else if (need_writecp) {
+ if (!err && need_writecp) {
struct cp_control cpc = {
.reason = CP_RECOVERY,
};
- mutex_unlock(&sbi->cp_mutex);
write_checkpoint(sbi, &cpc);
- } else {
- mutex_unlock(&sbi->cp_mutex);
}
- return err;
+
+ kmem_cache_destroy(fsync_entry_slab);
+ return ret ? ret: err;
}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 2bba0c4ef4b7..6802cd754eda 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -398,6 +398,9 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi)
init_waitqueue_head(&fcc->flush_wait_queue);
init_llist_head(&fcc->issue_list);
SM_I(sbi)->cmd_control_info = fcc;
+ if (!test_opt(sbi, FLUSH_MERGE))
+ return err;
+
fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
"f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
if (IS_ERR(fcc->f2fs_issue_flush)) {
@@ -516,28 +519,6 @@ static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0);
}
-bool discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr)
-{
- int err = -ENOTSUPP;
-
- if (test_opt(sbi, DISCARD)) {
- struct seg_entry *se = get_seg_entry(sbi,
- GET_SEGNO(sbi, blkaddr));
- unsigned int offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
-
- if (f2fs_test_bit(offset, se->discard_map))
- return false;
-
- err = f2fs_issue_discard(sbi, blkaddr, 1);
- }
-
- if (err) {
- update_meta_page(sbi, NULL, blkaddr);
- return true;
- }
- return false;
-}
-
static void __add_discard_entry(struct f2fs_sb_info *sbi,
struct cp_control *cpc, struct seg_entry *se,
unsigned int start, unsigned int end)
@@ -771,7 +752,7 @@ bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
struct seg_entry *se;
bool is_cp = false;
- if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR)
+ if (!is_valid_data_blkaddr(sbi, blkaddr))
return true;
mutex_lock(&sit_i->sentry_lock);
@@ -1485,7 +1466,7 @@ void f2fs_wait_on_encrypted_page_writeback(struct f2fs_sb_info *sbi,
{
struct page *cpage;
- if (blkaddr == NEW_ADDR)
+ if (!is_valid_data_blkaddr(sbi, blkaddr))
return;
f2fs_bug_on(sbi, blkaddr == NULL_ADDR);
@@ -2120,7 +2101,7 @@ static int build_curseg(struct f2fs_sb_info *sbi)
return restore_curseg_summaries(sbi);
}
-static void build_sit_entries(struct f2fs_sb_info *sbi)
+static int build_sit_entries(struct f2fs_sb_info *sbi)
{
struct sit_info *sit_i = SIT_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
@@ -2129,6 +2110,7 @@ static void build_sit_entries(struct f2fs_sb_info *sbi)
unsigned int i, start, end;
unsigned int readed, start_blk = 0;
int nrpages = MAX_BIO_BLOCKS(sbi);
+ int err = 0;
do {
readed = ra_meta_pages(sbi, start_blk, nrpages, META_SIT, true);
@@ -2142,36 +2124,62 @@ static void build_sit_entries(struct f2fs_sb_info *sbi)
struct f2fs_sit_entry sit;
struct page *page;
- mutex_lock(&curseg->curseg_mutex);
- for (i = 0; i < sits_in_cursum(sum); i++) {
- if (le32_to_cpu(segno_in_journal(sum, i))
- == start) {
- sit = sit_in_journal(sum, i);
- mutex_unlock(&curseg->curseg_mutex);
- goto got_it;
- }
- }
- mutex_unlock(&curseg->curseg_mutex);
-
page = get_current_sit_page(sbi, start);
sit_blk = (struct f2fs_sit_block *)page_address(page);
sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
f2fs_put_page(page, 1);
-got_it:
- check_block_count(sbi, start, &sit);
+
+ err = check_block_count(sbi, start, &sit);
+ if (err)
+ return err;
seg_info_from_raw_sit(se, &sit);
/* build discard map only one time */
memcpy(se->discard_map, se->cur_valid_map, SIT_VBLOCK_MAP_SIZE);
sbi->discard_blks += sbi->blocks_per_seg - se->valid_blocks;
- if (sbi->segs_per_sec > 1) {
- struct sec_entry *e = get_sec_entry(sbi, start);
- e->valid_blocks += se->valid_blocks;
- }
+ if (sbi->segs_per_sec > 1)
+ get_sec_entry(sbi, start)->valid_blocks +=
+ se->valid_blocks;
}
start_blk += readed;
} while (start_blk < sit_blk_cnt);
+
+ mutex_lock(&curseg->curseg_mutex);
+ for (i = 0; i < sits_in_cursum(sum); i++) {
+ struct f2fs_sit_entry sit;
+ struct seg_entry *se;
+ unsigned int old_valid_blocks;
+
+ start = le32_to_cpu(segno_in_journal(sum, i));
+ if (start >= MAIN_SEGS(sbi)) {
+ f2fs_msg(sbi->sb, KERN_ERR,
+ "Wrong journal entry on segno %u",
+ start);
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ err = -EINVAL;
+ break;
+ }
+
+ se = &sit_i->sentries[start];
+ sit = sit_in_journal(sum, i);
+
+ old_valid_blocks = se->valid_blocks;
+
+ err = check_block_count(sbi, start, &sit);
+ if (err)
+ break;
+ seg_info_from_raw_sit(se, &sit);
+
+ memcpy(se->discard_map, se->cur_valid_map, SIT_VBLOCK_MAP_SIZE);
+ sbi->discard_blks += old_valid_blocks - se->valid_blocks;
+
+ if (sbi->segs_per_sec > 1)
+ get_sec_entry(sbi, start)->valid_blocks +=
+ se->valid_blocks - old_valid_blocks;
+ }
+ mutex_unlock(&curseg->curseg_mutex);
+ return err;
}
static void init_free_segmap(struct f2fs_sb_info *sbi)
@@ -2316,7 +2324,7 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
INIT_LIST_HEAD(&sm_info->sit_entry_set);
- if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) {
+ if (!f2fs_readonly(sbi->sb)) {
err = create_flush_cmd_control(sbi);
if (err)
return err;
@@ -2333,7 +2341,9 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
return err;
/* reinit free segmap based on SIT */
- build_sit_entries(sbi);
+ err = build_sit_entries(sbi);
+ if (err)
+ return err;
init_free_segmap(sbi);
err = build_dirty_segmap(sbi);
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index bfa1d31f79aa..08b08ae6ba9d 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -17,6 +17,8 @@
#define DEF_RECLAIM_PREFREE_SEGMENTS 5 /* 5% over total segments */
+#define F2FS_MIN_SEGMENTS 9 /* SB + 2 (CP + SIT + NAT) + SSA + MAIN */
+
/* L: Logical segment # in volume, R: Relative segment # in main area */
#define GET_L2R_SEGNO(free_i, segno) (segno - free_i->start_segno)
#define GET_R2L_SEGNO(free_i, segno) (segno + free_i->start_segno)
@@ -46,13 +48,19 @@
(secno == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \
sbi->segs_per_sec)) \
-#define MAIN_BLKADDR(sbi) (SM_I(sbi)->main_blkaddr)
-#define SEG0_BLKADDR(sbi) (SM_I(sbi)->seg0_blkaddr)
+#define MAIN_BLKADDR(sbi) \
+ (SM_I(sbi) ? SM_I(sbi)->main_blkaddr : \
+ le32_to_cpu(F2FS_RAW_SUPER(sbi)->main_blkaddr))
+#define SEG0_BLKADDR(sbi) \
+ (SM_I(sbi) ? SM_I(sbi)->seg0_blkaddr : \
+ le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment0_blkaddr))
#define MAIN_SEGS(sbi) (SM_I(sbi)->main_segments)
#define MAIN_SECS(sbi) (sbi->total_sections)
-#define TOTAL_SEGS(sbi) (SM_I(sbi)->segment_count)
+#define TOTAL_SEGS(sbi) \
+ (SM_I(sbi) ? SM_I(sbi)->segment_count : \
+ le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment_count))
#define TOTAL_BLKS(sbi) (TOTAL_SEGS(sbi) << sbi->log_blocks_per_seg)
#define MAX_BLKADDR(sbi) (SEG0_BLKADDR(sbi) + TOTAL_BLKS(sbi))
@@ -72,7 +80,7 @@
(GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & (sbi->blocks_per_seg - 1))
#define GET_SEGNO(sbi, blk_addr) \
- (((blk_addr == NULL_ADDR) || (blk_addr == NEW_ADDR)) ? \
+ ((!is_valid_data_blkaddr(sbi, blk_addr)) ? \
NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \
GET_SEGNO_FROM_SEG0(sbi, blk_addr)))
#define GET_SECNO(sbi, segno) \
@@ -574,16 +582,20 @@ static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno)
f2fs_bug_on(sbi, segno > TOTAL_SEGS(sbi) - 1);
}
-static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr)
+static inline void verify_block_addr(struct f2fs_io_info *fio, block_t blk_addr)
{
- f2fs_bug_on(sbi, blk_addr < SEG0_BLKADDR(sbi)
- || blk_addr >= MAX_BLKADDR(sbi));
+ struct f2fs_sb_info *sbi = fio->sbi;
+
+ if (__is_meta_io(fio))
+ verify_blkaddr(sbi, blk_addr, META_GENERIC);
+ else
+ verify_blkaddr(sbi, blk_addr, DATA_GENERIC);
}
/*
* Summary block is always treated as an invalid block
*/
-static inline void check_block_count(struct f2fs_sb_info *sbi,
+static inline int check_block_count(struct f2fs_sb_info *sbi,
int segno, struct f2fs_sit_entry *raw_sit)
{
#ifdef CONFIG_F2FS_CHECK_FS
@@ -605,11 +617,25 @@ static inline void check_block_count(struct f2fs_sb_info *sbi,
cur_pos = next_pos;
is_valid = !is_valid;
} while (cur_pos < sbi->blocks_per_seg);
- BUG_ON(GET_SIT_VBLOCKS(raw_sit) != valid_blocks);
+
+ if (unlikely(GET_SIT_VBLOCKS(raw_sit) != valid_blocks)) {
+ f2fs_msg(sbi->sb, KERN_ERR,
+ "Mismatch valid blocks %d vs. %d",
+ GET_SIT_VBLOCKS(raw_sit), valid_blocks);
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ return -EINVAL;
+ }
#endif
/* check segment usage, and check boundary of a given segment number */
- f2fs_bug_on(sbi, GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg
- || segno > TOTAL_SEGS(sbi) - 1);
+ if (unlikely(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg
+ || segno > TOTAL_SEGS(sbi) - 1)) {
+ f2fs_msg(sbi->sb, KERN_ERR,
+ "Wrong valid blocks %d or segno %u",
+ GET_SIT_VBLOCKS(raw_sit), segno);
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ return -EINVAL;
+ }
+ return 0;
}
static inline pgoff_t current_sit_addr(struct f2fs_sb_info *sbi,
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 2ffc53d0c9c7..dbd7adff8b5a 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -994,6 +994,8 @@ static inline bool sanity_check_area_boundary(struct super_block *sb,
static int sanity_check_raw_super(struct super_block *sb,
struct f2fs_super_block *raw_super)
{
+ block_t segment_count, segs_per_sec, secs_per_zone;
+ block_t total_sections, blocks_per_seg;
unsigned int blocksize;
if (F2FS_SUPER_MAGIC != le32_to_cpu(raw_super->magic)) {
@@ -1047,6 +1049,68 @@ static int sanity_check_raw_super(struct super_block *sb,
return 1;
}
+ segment_count = le32_to_cpu(raw_super->segment_count);
+ segs_per_sec = le32_to_cpu(raw_super->segs_per_sec);
+ secs_per_zone = le32_to_cpu(raw_super->secs_per_zone);
+ total_sections = le32_to_cpu(raw_super->section_count);
+
+ /* blocks_per_seg should be 512, given the above check */
+ blocks_per_seg = 1 << le32_to_cpu(raw_super->log_blocks_per_seg);
+
+ if (segment_count > F2FS_MAX_SEGMENT ||
+ segment_count < F2FS_MIN_SEGMENTS) {
+ f2fs_msg(sb, KERN_INFO,
+ "Invalid segment count (%u)",
+ segment_count);
+ return 1;
+ }
+
+ if (total_sections > segment_count ||
+ total_sections < F2FS_MIN_SEGMENTS ||
+ segs_per_sec > segment_count || !segs_per_sec) {
+ f2fs_msg(sb, KERN_INFO,
+ "Invalid segment/section count (%u, %u x %u)",
+ segment_count, total_sections, segs_per_sec);
+ return 1;
+ }
+
+ if ((segment_count / segs_per_sec) < total_sections) {
+ f2fs_msg(sb, KERN_INFO,
+ "Small segment_count (%u < %u * %u)",
+ segment_count, segs_per_sec, total_sections);
+ return 1;
+ }
+
+ if (segment_count > (le64_to_cpu(raw_super->block_count) >> 9)) {
+ f2fs_msg(sb, KERN_INFO,
+ "Wrong segment_count / block_count (%u > %llu)",
+ segment_count, le64_to_cpu(raw_super->block_count));
+ return 1;
+ }
+
+ if (secs_per_zone > total_sections || !secs_per_zone) {
+ f2fs_msg(sb, KERN_INFO,
+ "Wrong secs_per_zone / total_sections (%u, %u)",
+ secs_per_zone, total_sections);
+ return 1;
+ }
+ if (le32_to_cpu(raw_super->extension_count) > F2FS_MAX_EXTENSION) {
+ f2fs_msg(sb, KERN_INFO,
+ "Corrupted extension count (%u > %u)",
+ le32_to_cpu(raw_super->extension_count),
+ F2FS_MAX_EXTENSION);
+ return 1;
+ }
+
+ if (le32_to_cpu(raw_super->cp_payload) >
+ (blocks_per_seg - F2FS_CP_PACKS)) {
+ f2fs_msg(sb, KERN_INFO,
+ "Insane cp_payload (%u > %u)",
+ le32_to_cpu(raw_super->cp_payload),
+ blocks_per_seg - F2FS_CP_PACKS);
+ return 1;
+ }
+
/* check reserved ino info */
if (le32_to_cpu(raw_super->node_ino) != 1 ||
le32_to_cpu(raw_super->meta_ino) != 2 ||
@@ -1059,13 +1123,6 @@ static int sanity_check_raw_super(struct super_block *sb,
return 1;
}
- if (le32_to_cpu(raw_super->segment_count) > F2FS_MAX_SEGMENT) {
- f2fs_msg(sb, KERN_INFO,
- "Invalid segment count (%u)",
- le32_to_cpu(raw_super->segment_count));
- return 1;
- }
-
/* check CP/SIT/NAT/SSA/MAIN_AREA area boundary */
if (sanity_check_area_boundary(sb, raw_super))
return 1;
@@ -1073,15 +1130,19 @@ static int sanity_check_raw_super(struct super_block *sb,
return 0;
}
-static int sanity_check_ckpt(struct f2fs_sb_info *sbi)
+int sanity_check_ckpt(struct f2fs_sb_info *sbi)
{
unsigned int total, fsmeta;
struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
+ unsigned int ovp_segments, reserved_segments;
unsigned int main_segs, blocks_per_seg;
unsigned int sit_segs, nat_segs;
unsigned int sit_bitmap_size, nat_bitmap_size;
unsigned int log_blocks_per_seg;
+ unsigned int segment_count_main;
+ unsigned int cp_pack_start_sum, cp_payload;
+ block_t user_block_count;
int i;
total = le32_to_cpu(raw_super->segment_count);
@@ -1096,6 +1157,26 @@ static int sanity_check_ckpt(struct f2fs_sb_info *sbi)
if (unlikely(fsmeta >= total))
return 1;
+ ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
+ reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
+
+ if (unlikely(fsmeta < F2FS_MIN_SEGMENTS ||
+ ovp_segments == 0 || reserved_segments == 0)) {
+ f2fs_msg(sbi->sb, KERN_ERR,
+ "Wrong layout: check mkfs.f2fs version");
+ return 1;
+ }
+
+ user_block_count = le64_to_cpu(ckpt->user_block_count);
+ segment_count_main = le32_to_cpu(raw_super->segment_count_main);
+ log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg);
+ if (!user_block_count || user_block_count >=
+ segment_count_main << log_blocks_per_seg) {
+ f2fs_msg(sbi->sb, KERN_ERR,
+ "Wrong user_block_count: %u", user_block_count);
+ return 1;
+ }
+
main_segs = le32_to_cpu(raw_super->segment_count_main);
blocks_per_seg = sbi->blocks_per_seg;
@@ -1112,7 +1193,6 @@ static int sanity_check_ckpt(struct f2fs_sb_info *sbi)
sit_bitmap_size = le32_to_cpu(ckpt->sit_ver_bitmap_bytesize);
nat_bitmap_size = le32_to_cpu(ckpt->nat_ver_bitmap_bytesize);
- log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg);
if (sit_bitmap_size != ((sit_segs / 2) << log_blocks_per_seg) / 8 ||
nat_bitmap_size != ((nat_segs / 2) << log_blocks_per_seg) / 8) {
@@ -1122,6 +1202,17 @@ static int sanity_check_ckpt(struct f2fs_sb_info *sbi)
return 1;
}
+ cp_pack_start_sum = __start_sum_addr(sbi);
+ cp_payload = __cp_payload(sbi);
+ if (cp_pack_start_sum < cp_payload + 1 ||
+ cp_pack_start_sum > blocks_per_seg - 1 -
+ NR_CURSEG_TYPE) {
+ f2fs_msg(sbi->sb, KERN_ERR,
+ "Wrong cp_pack_start_sum: %u",
+ cp_pack_start_sum);
+ return 1;
+ }
+
if (unlikely(f2fs_cp_error(sbi))) {
f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck");
return 1;
@@ -1358,13 +1449,6 @@ try_onemore:
goto free_meta_inode;
}
- /* sanity checking of checkpoint */
- err = -EINVAL;
- if (sanity_check_ckpt(sbi)) {
- f2fs_msg(sb, KERN_ERR, "Invalid F2FS checkpoint");
- goto free_cp;
- }
-
sbi->total_valid_node_count =
le32_to_cpu(sbi->ckpt->valid_node_count);
sbi->total_valid_inode_count =
@@ -1464,14 +1548,27 @@ try_onemore:
if (need_fsck)
set_sbi_flag(sbi, SBI_NEED_FSCK);
- err = recover_fsync_data(sbi);
- if (err) {
+ if (!retry)
+ goto skip_recovery;
+
+ err = recover_fsync_data(sbi, false);
+ if (err < 0) {
need_fsck = true;
f2fs_msg(sb, KERN_ERR,
"Cannot recover all fsync data errno=%ld", err);
goto free_kobj;
}
+ } else {
+ err = recover_fsync_data(sbi, true);
+
+ if (!f2fs_readonly(sb) && err > 0) {
+ err = -EINVAL;
+ f2fs_msg(sb, KERN_ERR,
+ "Need to recover fsync data");
+ goto free_kobj;
+ }
}
+skip_recovery:
/* recover_fsync_data() cleared this already */
clear_sbi_flag(sbi, SBI_POR_DOING);
@@ -1517,7 +1614,6 @@ free_nm:
destroy_node_manager(sbi);
free_sm:
destroy_segment_manager(sbi);
-free_cp:
kfree(sbi->ckpt);
free_meta_inode:
make_bad_inode(sbi->meta_inode);
diff --git a/fs/fscache/object.c b/fs/fscache/object.c
index 7a182c87f378..ab1d7f35f6c2 100644
--- a/fs/fscache/object.c
+++ b/fs/fscache/object.c
@@ -715,6 +715,9 @@ static const struct fscache_state *fscache_drop_object(struct fscache_object *ob
if (awaken)
wake_up_bit(&cookie->flags, FSCACHE_COOKIE_INVALIDATING);
+ if (test_and_clear_bit(FSCACHE_COOKIE_LOOKING_UP, &cookie->flags))
+ wake_up_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP);
+
/* Prevent a race with our last child, which has to signal EV_CLEARED
* before dropping our spinlock.
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index e566652ac922..341196338e48 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1741,7 +1741,6 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
req->in.h.nodeid = outarg->nodeid;
req->in.numargs = 2;
req->in.argpages = 1;
- req->page_descs[0].offset = offset;
req->end = fuse_retrieve_end;
index = outarg->offset >> PAGE_CACHE_SHIFT;
@@ -1756,6 +1755,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
req->pages[req->num_pages] = page;
+ req->page_descs[req->num_pages].offset = offset;
req->page_descs[req->num_pages].length = this_num;
req->num_pages++;
@@ -2074,10 +2074,13 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
ret = fuse_dev_do_write(fud, &cs, len);
+ pipe_lock(pipe);
for (idx = 0; idx < nbuf; idx++) {
struct pipe_buffer *buf = &bufs[idx];
buf->ops->release(pipe, buf);
}
+ pipe_unlock(pipe);
+
out:
kfree(bufs);
return ret;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 7014318f6d18..d40c2451487c 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1784,7 +1784,7 @@ static bool fuse_writepage_in_flight(struct fuse_req *new_req,
spin_unlock(&fc->lock);
dec_wb_stat(&bdi->wb, WB_WRITEBACK);
- dec_zone_page_state(page, NR_WRITEBACK_TEMP);
+ dec_zone_page_state(new_req->pages[0], NR_WRITEBACK_TEMP);
wb_writeout_inc(&bdi->wb);
fuse_writepage_free(fc, new_req);
fuse_request_free(new_req);
diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c
index 1ab19e660e69..1ff5774a5382 100644
--- a/fs/hfs/btree.c
+++ b/fs/hfs/btree.c
@@ -328,13 +328,14 @@ void hfs_bmap_free(struct hfs_bnode *node)
nidx -= len * 8;
i = node->next;
- hfs_bnode_put(node);
if (!i) {
/* panic */;
pr_crit("unable to free bnode %u. bmap not found!\n",
node->this);
+ hfs_bnode_put(node);
return;
}
+ hfs_bnode_put(node);
node = hfs_bnode_find(tree, i);
if (IS_ERR(node))
return;
diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c
index 3345c7553edc..7adc8a327e03 100644
--- a/fs/hfsplus/btree.c
+++ b/fs/hfsplus/btree.c
@@ -453,14 +453,15 @@ void hfs_bmap_free(struct hfs_bnode *node)
nidx -= len * 8;
i = node->next;
- hfs_bnode_put(node);
if (!i) {
/* panic */;
pr_crit("unable to free bnode %u. "
"bmap not found!\n",
node->this);
+ hfs_bnode_put(node);
return;
}
+ hfs_bnode_put(node);
node = hfs_bnode_find(tree, i);
if (IS_ERR(node))
return;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index a17da8b57fc6..cefae2350da5 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -118,6 +118,16 @@ static void huge_pagevec_release(struct pagevec *pvec)
pagevec_reinit(pvec);
}
+/*
+ * Mask used when checking the page offset value passed in via system
+ * calls. This value will be converted to a loff_t which is signed.
+ * Therefore, we want to check the upper PAGE_SHIFT + 1 bits of the
+ * value. The extra bit (- 1 in the shift value) is to take the sign
+ * bit into account.
+ */
+#define PGOFF_LOFFT_MAX \
+ (((1UL << (PAGE_SHIFT + 1)) - 1) << (BITS_PER_LONG - (PAGE_SHIFT + 1)))
+
static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
{
struct inode *inode = file_inode(file);
@@ -136,17 +146,31 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
vma->vm_flags |= VM_HUGETLB | VM_DONTEXPAND;
vma->vm_ops = &hugetlb_vm_ops;
+ /*
+ * page based offset in vm_pgoff could be sufficiently large to
+ * overflow a loff_t when converted to byte offset. This can
+ * only happen on architectures where sizeof(loff_t) ==
+ * sizeof(unsigned long). So, only check in those instances.
+ */
+ if (sizeof(unsigned long) == sizeof(loff_t)) {
+ if (vma->vm_pgoff & PGOFF_LOFFT_MAX)
+ return -EINVAL;
+ }
+
+ /* must be huge page aligned */
if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT))
return -EINVAL;
vma_len = (loff_t)(vma->vm_end - vma->vm_start);
+ len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
+ /* check for overflow */
+ if (len < vma_len)
+ return -EINVAL;
mutex_lock(&inode->i_mutex);
file_accessed(file);
ret = -ENOMEM;
- len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
-
if (hugetlb_reserve_pages(inode,
vma->vm_pgoff >> huge_page_order(h),
len >> huge_page_shift(h), vma,
@@ -155,7 +179,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
ret = 0;
if (vma->vm_flags & VM_WRITE && inode->i_size < len)
- inode->i_size = len;
+ i_size_write(inode, len);
out:
mutex_unlock(&inode->i_mutex);
@@ -845,6 +869,18 @@ static int hugetlbfs_migrate_page(struct address_space *mapping,
rc = migrate_huge_page_move_mapping(mapping, newpage, page);
if (rc != MIGRATEPAGE_SUCCESS)
return rc;
+
+ /*
+ * page_private is subpool pointer in hugetlb pages. Transfer to
+ * new page. PagePrivate is not associated with page_private for
+ * hugetlb pages and can not be set here as only page_huge_active
+ * pages can be migrated.
+ */
+ if (page_private(page)) {
+ set_page_private(newpage, page_private(page));
+ set_page_private(page, 0);
+ }
+
migrate_page_copy(newpage, page);
return MIGRATEPAGE_SUCCESS;
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index bce343febb9e..c34433432d47 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1215,11 +1215,12 @@ int jbd2_journal_get_undo_access(handle_t *handle, struct buffer_head *bh)
struct journal_head *jh;
char *committed_data = NULL;
- JBUFFER_TRACE(jh, "entry");
if (jbd2_write_access_granted(handle, bh, true))
return 0;
jh = jbd2_journal_add_journal_head(bh);
+ JBUFFER_TRACE(jh, "entry");
+
/*
* Do this first --- it can drop the journal lock, so we want to
* make sure that obtaining the committed_data is done
@@ -1336,15 +1337,17 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
if (is_handle_aborted(handle))
return -EROFS;
- if (!buffer_jbd(bh)) {
- ret = -EUCLEAN;
- goto out;
- }
+ if (!buffer_jbd(bh))
+ return -EUCLEAN;
+
/*
* We don't grab jh reference here since the buffer must be part
* of the running transaction.
*/
jh = bh2jh(bh);
+ jbd_debug(5, "journal_head %p\n", jh);
+ JBUFFER_TRACE(jh, "entry");
+
/*
* This and the following assertions are unreliable since we may see jh
* in inconsistent state unless we grab bh_state lock. But this is
@@ -1378,9 +1381,6 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
}
journal = transaction->t_journal;
- jbd_debug(5, "journal_head %p\n", jh);
- JBUFFER_TRACE(jh, "entry");
-
jbd_lock_bh_state(bh);
if (jh->b_modified == 0) {
@@ -1578,14 +1578,21 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
/* However, if the buffer is still owned by a prior
* (committing) transaction, we can't drop it yet... */
JBUFFER_TRACE(jh, "belongs to older transaction");
- /* ... but we CAN drop it from the new transaction if we
- * have also modified it since the original commit. */
+ /* ... but we CAN drop it from the new transaction through
+ * marking the buffer as freed and set j_next_transaction to
+ * the new transaction, so that not only the commit code
+ * knows it should clear dirty bits when it is done with the
+ * buffer, but also the buffer can be checkpointed only
+ * after the new transaction commits. */
- if (jh->b_next_transaction) {
- J_ASSERT(jh->b_next_transaction == transaction);
+ set_buffer_freed(bh);
+
+ if (!jh->b_next_transaction) {
spin_lock(&journal->j_list_lock);
- jh->b_next_transaction = NULL;
+ jh->b_next_transaction = transaction;
spin_unlock(&journal->j_list_lock);
+ } else {
+ J_ASSERT(jh->b_next_transaction == transaction);
/*
* only drop a reference if this transaction modified
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 1544f530ccd0..023e7f32ee1b 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -101,7 +101,8 @@ static int jffs2_sync_fs(struct super_block *sb, int wait)
struct jffs2_sb_info *c = JFFS2_SB_INFO(sb);
#ifdef CONFIG_JFFS2_FS_WRITEBUFFER
- cancel_delayed_work_sync(&c->wbuf_dwork);
+ if (jffs2_is_writebuffered(c))
+ cancel_delayed_work_sync(&c->wbuf_dwork);
#endif
mutex_lock(&c->alloc_sem);
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index 0a3f9b594602..37779ed3f790 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -233,7 +233,7 @@ ncp_get_charsets(struct ncp_server* server, struct ncp_nls_ioctl __user *arg)
len = strlen(server->nls_vol->charset);
if (len > NCP_IOCSNAME_LEN)
len = NCP_IOCSNAME_LEN;
- strncpy(user.codepage, server->nls_vol->charset, len);
+ strscpy(user.codepage, server->nls_vol->charset, NCP_IOCSNAME_LEN);
user.codepage[len] = 0;
}
@@ -243,7 +243,7 @@ ncp_get_charsets(struct ncp_server* server, struct ncp_nls_ioctl __user *arg)
len = strlen(server->nls_io->charset);
if (len > NCP_IOCSNAME_LEN)
len = NCP_IOCSNAME_LEN;
- strncpy(user.iocharset, server->nls_io->charset, len);
+ strscpy(user.iocharset, server->nls_io->charset, NCP_IOCSNAME_LEN);
user.iocharset[len] = 0;
}
mutex_unlock(&server->root_setup_lock);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 211440722e24..88cb8e0d6014 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -670,6 +670,10 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
req = nfs_list_entry(reqs.next);
nfs_direct_setup_mirroring(dreq, &desc, req);
+ if (desc.pg_error < 0) {
+ list_splice_init(&reqs, &failed);
+ goto out_failed;
+ }
list_for_each_entry_safe(req, tmp, &reqs, wb_list) {
if (!nfs_pageio_add_request(&desc, req)) {
@@ -677,13 +681,17 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
nfs_list_add_request(req, &failed);
spin_lock(cinfo.lock);
dreq->flags = 0;
- dreq->error = -EIO;
+ if (desc.pg_error < 0)
+ dreq->error = desc.pg_error;
+ else
+ dreq->error = -EIO;
spin_unlock(cinfo.lock);
}
nfs_release_request(req);
}
nfs_pageio_complete(&desc);
+out_failed:
while (!list_empty(&failed)) {
req = nfs_list_entry(failed.next);
nfs_list_remove_request(req);
@@ -898,6 +906,11 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
}
nfs_direct_setup_mirroring(dreq, &desc, req);
+ if (desc.pg_error < 0) {
+ nfs_free_request(req);
+ result = desc.pg_error;
+ break;
+ }
nfs_lock_request(req);
req->wb_index = pos >> PAGE_SHIFT;
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index fd8da630fd22..8e268965c96d 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -882,13 +882,19 @@ static void
filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
struct nfs_page *req)
{
- if (!pgio->pg_lseg)
+ if (!pgio->pg_lseg) {
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
req->wb_context,
0,
NFS4_MAX_UINT64,
IOMODE_READ,
GFP_KERNEL);
+ if (IS_ERR(pgio->pg_lseg)) {
+ pgio->pg_error = PTR_ERR(pgio->pg_lseg);
+ pgio->pg_lseg = NULL;
+ return;
+ }
+ }
/* If no lseg, fall back to read through mds */
if (pgio->pg_lseg == NULL)
nfs_pageio_reset_read_mds(pgio);
@@ -901,13 +907,20 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
struct nfs_commit_info cinfo;
int status;
- if (!pgio->pg_lseg)
+ if (!pgio->pg_lseg) {
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
req->wb_context,
0,
NFS4_MAX_UINT64,
IOMODE_RW,
GFP_NOFS);
+ if (IS_ERR(pgio->pg_lseg)) {
+ pgio->pg_error = PTR_ERR(pgio->pg_lseg);
+ pgio->pg_lseg = NULL;
+ return;
+ }
+ }
+
/* If no lseg, fall back to write through mds */
if (pgio->pg_lseg == NULL)
goto out_mds;
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index c8e90152b61b..6506775575aa 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -786,13 +786,19 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio,
int ds_idx;
/* Use full layout for now */
- if (!pgio->pg_lseg)
+ if (!pgio->pg_lseg) {
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
req->wb_context,
0,
NFS4_MAX_UINT64,
IOMODE_READ,
GFP_KERNEL);
+ if (IS_ERR(pgio->pg_lseg)) {
+ pgio->pg_error = PTR_ERR(pgio->pg_lseg);
+ pgio->pg_lseg = NULL;
+ return;
+ }
+ }
/* If no lseg, fall back to read through mds */
if (pgio->pg_lseg == NULL)
goto out_mds;
@@ -826,13 +832,19 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio,
int i;
int status;
- if (!pgio->pg_lseg)
+ if (!pgio->pg_lseg) {
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
req->wb_context,
0,
NFS4_MAX_UINT64,
IOMODE_RW,
GFP_NOFS);
+ if (IS_ERR(pgio->pg_lseg)) {
+ pgio->pg_error = PTR_ERR(pgio->pg_lseg);
+ pgio->pg_lseg = NULL;
+ return;
+ }
+ }
/* If no lseg, fall back to write through mds */
if (pgio->pg_lseg == NULL)
goto out_mds;
@@ -868,18 +880,25 @@ static unsigned int
ff_layout_pg_get_mirror_count_write(struct nfs_pageio_descriptor *pgio,
struct nfs_page *req)
{
- if (!pgio->pg_lseg)
+ if (!pgio->pg_lseg) {
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
req->wb_context,
0,
NFS4_MAX_UINT64,
IOMODE_RW,
GFP_NOFS);
+ if (IS_ERR(pgio->pg_lseg)) {
+ pgio->pg_error = PTR_ERR(pgio->pg_lseg);
+ pgio->pg_lseg = NULL;
+ goto out;
+ }
+ }
if (pgio->pg_lseg)
return FF_LAYOUT_MIRROR_COUNT(pgio->pg_lseg);
/* no lseg means that pnfs is not in use, so no mirroring here */
nfs_pageio_reset_write_mds(pgio);
+out:
return 1;
}
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 4bdc2fc86280..8a2077408ab0 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -872,6 +872,9 @@ static int nfs_pageio_setup_mirroring(struct nfs_pageio_descriptor *pgio,
mirror_count = pgio->pg_ops->pg_get_mirror_count(pgio, req);
+ if (pgio->pg_error < 0)
+ return pgio->pg_error;
+
if (!mirror_count || mirror_count > NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX)
return -EINVAL;
@@ -980,6 +983,8 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
} else {
if (desc->pg_ops->pg_init)
desc->pg_ops->pg_init(desc, req);
+ if (desc->pg_error < 0)
+ return 0;
mirror->pg_base = req->wb_pgbase;
}
if (!nfs_can_coalesce_requests(prev, req, desc))
@@ -1102,7 +1107,6 @@ static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc)
struct nfs_page *req;
req = list_first_entry(&head, struct nfs_page, wb_list);
- nfs_list_remove_request(req);
if (__nfs_pageio_add_request(desc, req))
continue;
if (desc->pg_error < 0) {
@@ -1145,6 +1149,8 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
bytes = req->wb_bytes;
nfs_pageio_setup_mirroring(desc, req);
+ if (desc->pg_error < 0)
+ return 0;
for (midx = 0; midx < desc->pg_mirror_count; midx++) {
if (midx) {
@@ -1196,7 +1202,7 @@ static void nfs_pageio_complete_mirror(struct nfs_pageio_descriptor *desc,
desc->pg_mirror_idx = mirror_idx;
for (;;) {
nfs_pageio_doio(desc);
- if (!mirror->pg_recoalesce)
+ if (desc->pg_error < 0 || !mirror->pg_recoalesce)
break;
if (!nfs_do_recoalesce(desc))
break;
@@ -1230,7 +1236,7 @@ int nfs_pageio_resend(struct nfs_pageio_descriptor *desc,
nfs_pageio_complete(desc);
if (!list_empty(&failed)) {
list_move(&failed, &hdr->pages);
- return -EIO;
+ return desc->pg_error < 0 ? desc->pg_error : -EIO;
}
return 0;
}
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index c8e75e5e6a67..d34fb0feb5c2 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -909,14 +909,15 @@ send_layoutget(struct pnfs_layout_hdr *lo,
if (IS_ERR(lseg)) {
switch (PTR_ERR(lseg)) {
- case -ENOMEM:
case -ERESTARTSYS:
+ case -EIO:
+ case -ENOSPC:
+ case -EROFS:
+ case -E2BIG:
break;
default:
- /* remember that LAYOUTGET failed and suspend trying */
- pnfs_layout_io_set_failed(lo, range->iomode);
+ return NULL;
}
- return NULL;
} else
pnfs_layout_clear_fail_bit(lo,
pnfs_iomode_to_fail_bit(range->iomode));
@@ -1625,7 +1626,7 @@ out:
"(%s, offset: %llu, length: %llu)\n",
__func__, ino->i_sb->s_id,
(unsigned long long)NFS_FILEID(ino),
- lseg == NULL ? "not found" : "found",
+ IS_ERR_OR_NULL(lseg) ? "not found" : "found",
iomode==IOMODE_RW ? "read/write" : "read-only",
(unsigned long long)pos,
(unsigned long long)count);
@@ -1804,6 +1805,11 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r
rd_size,
IOMODE_READ,
GFP_KERNEL);
+ if (IS_ERR(pgio->pg_lseg)) {
+ pgio->pg_error = PTR_ERR(pgio->pg_lseg);
+ pgio->pg_lseg = NULL;
+ return;
+ }
}
/* If no lseg, fall back to read through mds */
if (pgio->pg_lseg == NULL)
@@ -1816,13 +1822,19 @@ void
pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
struct nfs_page *req, u64 wb_size)
{
- if (pgio->pg_lseg == NULL)
+ if (pgio->pg_lseg == NULL) {
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
req->wb_context,
req_offset(req),
wb_size,
IOMODE_RW,
GFP_NOFS);
+ if (IS_ERR(pgio->pg_lseg)) {
+ pgio->pg_error = PTR_ERR(pgio->pg_lseg);
+ pgio->pg_lseg = NULL;
+ return;
+ }
+ }
/* If no lseg, fall back to write through mds */
if (pgio->pg_lseg == NULL)
nfs_pageio_reset_write_mds(pgio);
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 0a5e33f33b5c..0bb580174cb3 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -115,7 +115,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
pgm = &pgio.pg_mirrors[0];
NFS_I(inode)->read_io += pgm->pg_bytes_written;
- return 0;
+ return pgio.pg_error < 0 ? pgio.pg_error : 0;
}
static void nfs_readpage_release(struct nfs_page *req)
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 62f358f67764..9b42139a479b 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1877,6 +1877,11 @@ static int nfs_parse_devname(const char *dev_name,
size_t len;
char *end;
+ if (unlikely(!dev_name || !*dev_name)) {
+ dfprintk(MOUNT, "NFS: device name not specified\n");
+ return -EINVAL;
+ }
+
/* Is the host name protected with square brakcets? */
if (*dev_name == '[') {
end = strchr(++dev_name, ']');
@@ -2376,8 +2381,7 @@ static int nfs_compare_mount_options(const struct super_block *s, const struct n
goto Ebusy;
if (a->acdirmax != b->acdirmax)
goto Ebusy;
- if (b->auth_info.flavor_len > 0 &&
- clnt_a->cl_auth->au_flavor != clnt_b->cl_auth->au_flavor)
+ if (clnt_a->cl_auth->au_flavor != clnt_b->cl_auth->au_flavor)
goto Ebusy;
return 1;
Ebusy:
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 7b755b7f785c..91146f025769 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -430,8 +430,19 @@ nfsd3_proc_readdir(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp,
&resp->common, nfs3svc_encode_entry);
memcpy(resp->verf, argp->verf, 8);
resp->count = resp->buffer - argp->buffer;
- if (resp->offset)
- xdr_encode_hyper(resp->offset, argp->cookie);
+ if (resp->offset) {
+ loff_t offset = argp->cookie;
+
+ if (unlikely(resp->offset1)) {
+ /* we ended up with offset on a page boundary */
+ *resp->offset = htonl(offset >> 32);
+ *resp->offset1 = htonl(offset & 0xffffffff);
+ resp->offset1 = NULL;
+ } else {
+ xdr_encode_hyper(resp->offset, offset);
+ }
+ resp->offset = NULL;
+ }
RETURN_STATUS(nfserr);
}
@@ -499,6 +510,7 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp,
} else {
xdr_encode_hyper(resp->offset, offset);
}
+ resp->offset = NULL;
}
RETURN_STATUS(nfserr);
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 7162ab7bc093..d4fa7fbc37dc 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -898,6 +898,7 @@ encode_entry(struct readdir_cd *ccd, const char *name, int namlen,
} else {
xdr_encode_hyper(cd->offset, offset64);
}
+ cd->offset = NULL;
}
/*
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 9690cb4dd588..0cd57db5c5af 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1106,6 +1106,8 @@ static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size)
case 'Y':
case 'y':
case '1':
+ if (!nn->nfsd_serv)
+ return -EBUSY;
nfsd4_end_grace(nn);
break;
default:
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index 272269f1c310..9ee8bcfbf00f 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -146,7 +146,6 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
BUG();
}
- clear_buffer_uptodate(bh);
get_bh(bh); /* for end_buffer_read_sync() */
bh->b_end_io = end_buffer_read_sync;
submit_bh(READ, bh);
@@ -300,7 +299,6 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
continue;
}
- clear_buffer_uptodate(bh);
get_bh(bh); /* for end_buffer_read_sync() */
if (validate)
set_buffer_needs_validate(bh);
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index 827fc9809bc2..3494e220b510 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -125,10 +125,10 @@ check_err:
check_gen:
if (handle->ih_generation != inode->i_generation) {
- iput(inode);
trace_ocfs2_get_dentry_generation((unsigned long long)blkno,
handle->ih_generation,
inode->i_generation);
+ iput(inode);
result = ERR_PTR(-ESTALE);
goto bail;
}
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index 0a4457fb0711..85111d740c9d 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -345,13 +345,18 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb)
if (num_used
|| alloc->id1.bitmap1.i_used
|| alloc->id1.bitmap1.i_total
- || la->la_bm_off)
- mlog(ML_ERROR, "Local alloc hasn't been recovered!\n"
+ || la->la_bm_off) {
+ mlog(ML_ERROR, "inconsistent detected, clean journal with"
+ " unrecovered local alloc, please run fsck.ocfs2!\n"
"found = %u, set = %u, taken = %u, off = %u\n",
num_used, le32_to_cpu(alloc->id1.bitmap1.i_used),
le32_to_cpu(alloc->id1.bitmap1.i_total),
OCFS2_LOCAL_ALLOC(alloc)->la_bm_off);
+ status = -EINVAL;
+ goto bail;
+ }
+
osb->local_alloc_bh = alloc_bh;
osb->local_alloc_state = OCFS2_LA_ENABLED;
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
index 124471d26a73..c1a83c58456e 100644
--- a/fs/ocfs2/move_extents.c
+++ b/fs/ocfs2/move_extents.c
@@ -156,18 +156,14 @@ out:
}
/*
- * lock allocators, and reserving appropriate number of bits for
- * meta blocks and data clusters.
- *
- * in some cases, we don't need to reserve clusters, just let data_ac
- * be NULL.
+ * lock allocator, and reserve appropriate number of bits for
+ * meta blocks.
*/
-static int ocfs2_lock_allocators_move_extents(struct inode *inode,
+static int ocfs2_lock_meta_allocator_move_extents(struct inode *inode,
struct ocfs2_extent_tree *et,
u32 clusters_to_move,
u32 extents_to_split,
struct ocfs2_alloc_context **meta_ac,
- struct ocfs2_alloc_context **data_ac,
int extra_blocks,
int *credits)
{
@@ -192,13 +188,6 @@ static int ocfs2_lock_allocators_move_extents(struct inode *inode,
goto out;
}
- if (data_ac) {
- ret = ocfs2_reserve_clusters(osb, clusters_to_move, data_ac);
- if (ret) {
- mlog_errno(ret);
- goto out;
- }
- }
*credits += ocfs2_calc_extend_credits(osb->sb, et->et_root_el);
@@ -260,10 +249,10 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context,
}
}
- ret = ocfs2_lock_allocators_move_extents(inode, &context->et, *len, 1,
- &context->meta_ac,
- &context->data_ac,
- extra_blocks, &credits);
+ ret = ocfs2_lock_meta_allocator_move_extents(inode, &context->et,
+ *len, 1,
+ &context->meta_ac,
+ extra_blocks, &credits);
if (ret) {
mlog_errno(ret);
goto out;
@@ -286,6 +275,21 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context,
}
}
+ /*
+ * Make sure ocfs2_reserve_cluster is called after
+ * __ocfs2_flush_truncate_log, otherwise, dead lock may happen.
+ *
+ * If ocfs2_reserve_cluster is called
+ * before __ocfs2_flush_truncate_log, dead lock on global bitmap
+ * may happen.
+ *
+ */
+ ret = ocfs2_reserve_clusters(osb, *len, &context->data_ac);
+ if (ret) {
+ mlog_errno(ret);
+ goto out_unlock_mutex;
+ }
+
handle = ocfs2_start_trans(osb, credits);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
@@ -606,9 +610,10 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
}
}
- ret = ocfs2_lock_allocators_move_extents(inode, &context->et, len, 1,
- &context->meta_ac,
- NULL, extra_blocks, &credits);
+ ret = ocfs2_lock_meta_allocator_move_extents(inode, &context->et,
+ len, 1,
+ &context->meta_ac,
+ extra_blocks, &credits);
if (ret) {
mlog_errno(ret);
goto out;
diff --git a/fs/proc/array.c b/fs/proc/array.c
index cb71cbae606d..60cbaa821164 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -333,7 +333,7 @@ static inline void task_seccomp(struct seq_file *m, struct task_struct *p)
#ifdef CONFIG_SECCOMP
seq_printf(m, "Seccomp:\t%d\n", p->seccomp.mode);
#endif
- seq_printf(m, "\nSpeculation_Store_Bypass:\t");
+ seq_printf(m, "Speculation_Store_Bypass:\t");
switch (arch_prctl_spec_ctrl_get(p, PR_SPEC_STORE_BYPASS)) {
case -EINVAL:
seq_printf(m, "unknown");
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 4beed301e224..bd8c26a409a7 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -254,7 +254,7 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf,
* Inherently racy -- command line shares address space
* with code and data.
*/
- rv = access_remote_vm(mm, arg_end - 1, &c, 1, 0);
+ rv = access_remote_vm(mm, arg_end - 1, &c, 1, FOLL_ANON);
if (rv <= 0)
goto out_free_page;
@@ -272,7 +272,7 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf,
int nr_read;
_count = min3(count, len, PAGE_SIZE);
- nr_read = access_remote_vm(mm, p, page, _count, 0);
+ nr_read = access_remote_vm(mm, p, page, _count, FOLL_ANON);
if (nr_read < 0)
rv = nr_read;
if (nr_read <= 0)
@@ -307,7 +307,7 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf,
bool final;
_count = min3(count, len, PAGE_SIZE);
- nr_read = access_remote_vm(mm, p, page, _count, 0);
+ nr_read = access_remote_vm(mm, p, page, _count, FOLL_ANON);
if (nr_read < 0)
rv = nr_read;
if (nr_read <= 0)
@@ -356,7 +356,7 @@ skip_argv:
bool final;
_count = min3(count, len, PAGE_SIZE);
- nr_read = access_remote_vm(mm, p, page, _count, 0);
+ nr_read = access_remote_vm(mm, p, page, _count, FOLL_ANON);
if (nr_read < 0)
rv = nr_read;
if (nr_read <= 0)
@@ -868,6 +868,7 @@ static ssize_t mem_rw(struct file *file, char __user *buf,
unsigned long addr = *ppos;
ssize_t copied;
char *page;
+ unsigned int flags;
if (!mm)
return 0;
@@ -880,6 +881,11 @@ static ssize_t mem_rw(struct file *file, char __user *buf,
if (!atomic_inc_not_zero(&mm->mm_users))
goto free;
+ /* Maybe we should limit FOLL_FORCE to actual ptrace users? */
+ flags = FOLL_FORCE;
+ if (write)
+ flags |= FOLL_WRITE;
+
while (count > 0) {
int this_len = min_t(int, count, PAGE_SIZE);
@@ -888,7 +894,7 @@ static ssize_t mem_rw(struct file *file, char __user *buf,
break;
}
- this_len = access_remote_vm(mm, addr, page, this_len, write);
+ this_len = access_remote_vm(mm, addr, page, this_len, flags);
if (!this_len) {
if (!copied)
copied = -EIO;
@@ -1000,8 +1006,7 @@ static ssize_t environ_read(struct file *file, char __user *buf,
max_len = min_t(size_t, PAGE_SIZE, count);
this_len = min(max_len, this_len);
- retval = access_remote_vm(mm, (env_start + src),
- page, this_len, 0);
+ retval = access_remote_vm(mm, (env_start + src), page, this_len, FOLL_ANON);
if (retval <= 0) {
ret = retval;
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 588461bb2dd4..e97e7d74e134 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -392,8 +392,8 @@ static void pstore_console_write(struct console *con, const char *s, unsigned c)
} else {
spin_lock_irqsave(&psinfo->buf_lock, flags);
}
- memcpy(psinfo->buf, s, c);
- psinfo->write(PSTORE_TYPE_CONSOLE, 0, &id, 0, 0, 0, c, psinfo);
+ psinfo->write_buf(PSTORE_TYPE_CONSOLE, 0, &id, 0,
+ s, 0, c, psinfo);
spin_unlock_irqrestore(&psinfo->buf_lock, flags);
s += c;
c = e - s;
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index bd21795ce657..679d75a864d0 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -445,6 +445,11 @@ static int persistent_ram_post_init(struct persistent_ram_zone *prz, u32 sig,
sig ^= PERSISTENT_RAM_SIG;
if (prz->buffer->sig == sig) {
+ if (buffer_size(prz) == 0) {
+ pr_debug("found existing empty buffer\n");
+ return 0;
+ }
+
if (buffer_size(prz) > prz->buffer_size ||
buffer_start(prz) > buffer_size(prz))
pr_info("found existing invalid buffer, size %zu, start %zu\n",
diff --git a/fs/read_write.c b/fs/read_write.c
index bfd1a5dddf6e..16e554ba885d 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -363,8 +363,10 @@ ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos)
iter->type |= WRITE;
ret = file->f_op->write_iter(&kiocb, iter);
BUG_ON(ret == -EIOCBQUEUED);
- if (ret > 0)
+ if (ret > 0) {
*ppos = kiocb.ki_pos;
+ fsnotify_modify(file);
+ }
return ret;
}
EXPORT_SYMBOL(vfs_iter_write);
diff --git a/fs/super.c b/fs/super.c
index 09b526a50986..b9cd7982f6e2 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -118,13 +118,23 @@ static unsigned long super_cache_count(struct shrinker *shrink,
sb = container_of(shrink, struct super_block, s_shrink);
/*
- * Don't call trylock_super as it is a potential
- * scalability bottleneck. The counts could get updated
- * between super_cache_count and super_cache_scan anyway.
- * Call to super_cache_count with shrinker_rwsem held
- * ensures the safety of call to list_lru_shrink_count() and
- * s_op->nr_cached_objects().
+ * We don't call trylock_super() here as it is a scalability bottleneck,
+ * so we're exposed to partial setup state. The shrinker rwsem does not
+ * protect filesystem operations backing list_lru_shrink_count() or
+ * s_op->nr_cached_objects(). Counts can change between
+ * super_cache_count and super_cache_scan, so we really don't need locks
+ * here.
+ *
+ * However, if we are currently mounting the superblock, the underlying
+ * filesystem might be in a state of partial construction and hence it
+ * is dangerous to access it. trylock_super() uses a MS_BORN check to
+ * avoid this situation, so do the same here. The memory barrier is
+ * matched with the one in mount_fs() as we don't hold locks here.
*/
+ if (!(sb->s_flags & MS_BORN))
+ return 0;
+ smp_rmb();
+
if (sb->s_op && sb->s_op->nr_cached_objects)
total_objects = sb->s_op->nr_cached_objects(sb, sc);
@@ -1133,6 +1143,14 @@ mount_fs(struct file_system_type *type, int flags, const char *name, void *data)
sb = root->d_sb;
BUG_ON(!sb);
WARN_ON(!sb->s_bdi);
+
+ /*
+ * Write barrier is for super_cache_count(). We place it before setting
+ * MS_BORN as the data dependency between the two functions is the
+ * superblock structure contents that we just set up, not the MS_BORN
+ * flag.
+ */
+ smp_wmb();
sb->s_flags |= MS_BORN;
error = security_sb_kern_mount(sb, flags, secdata);
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index 02fa1dcc5969..29f5b2e589a1 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -275,7 +275,7 @@ static int __sysv_write_inode(struct inode *inode, int wait)
}
}
brelse(bh);
- return 0;
+ return err;
}
int sysv_write_inode(struct inode *inode, struct writeback_control *wbc)
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 0e659d9c69a1..613193c6bb42 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -1364,6 +1364,12 @@ reread:
iinfo->i_alloc_type = le16_to_cpu(fe->icbTag.flags) &
ICBTAG_FLAG_AD_MASK;
+ if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_SHORT &&
+ iinfo->i_alloc_type != ICBTAG_FLAG_AD_LONG &&
+ iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) {
+ ret = -EIO;
+ goto out;
+ }
iinfo->i_unique = 0;
iinfo->i_lenEAttr = 0;
iinfo->i_lenExtents = 0;
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index fb9636cc927c..5d8d12746e6e 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -528,7 +528,14 @@ xfs_attr_shortform_addname(xfs_da_args_t *args)
if (args->flags & ATTR_CREATE)
return retval;
retval = xfs_attr_shortform_remove(args);
- ASSERT(retval == 0);
+ if (retval)
+ return retval;
+ /*
+ * Since we have removed the old attr, clear ATTR_REPLACE so
+ * that the leaf format add routine won't trip over the attr
+ * not being around.
+ */
+ args->flags &= ~ATTR_REPLACE;
}
if (args->namelen >= XFS_ATTR_SF_ENTSIZE_MAX ||