summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorPhilippe Schenker <philippe.schenker@toradex.com>2019-05-08 16:08:30 +0200
committerPhilippe Schenker <philippe.schenker@toradex.com>2019-05-08 16:08:30 +0200
commit774f42075a4800fe4106dffca804e3207bc3c2e7 (patch)
tree4bb3e8ca84f7f23ac41966017e81ffe09e98412e /fs
parent95a0514c2e68a035e2adbdb5a297da310dccb09c (diff)
parent33e2f28596a7059dbe31d837caae924a74aa36d8 (diff)
Merge branch 'linux-4.14.y_for_4.14-2.0.x-imx' into 4.14-2.0.x-imx
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/v9fs.c21
-rw-r--r--fs/9p/v9fs.h1
-rw-r--r--fs/9p/v9fs_vfs.h23
-rw-r--r--fs/9p/vfs_dir.c8
-rw-r--r--fs/9p/vfs_file.c12
-rw-r--r--fs/9p/vfs_inode.c23
-rw-r--r--fs/9p/vfs_inode_dotl.c27
-rw-r--r--fs/9p/vfs_super.c4
-rw-r--r--fs/autofs4/expire.c3
-rw-r--r--fs/autofs4/inode.c4
-rw-r--r--fs/block_dev.c8
-rw-r--r--fs/btrfs/acl.c9
-rw-r--r--fs/btrfs/extent_io.c4
-rw-r--r--fs/btrfs/ioctl.c10
-rw-r--r--fs/btrfs/props.c8
-rw-r--r--fs/btrfs/raid56.c3
-rw-r--r--fs/btrfs/tree-log.c24
-rw-r--r--fs/btrfs/volumes.c4
-rw-r--r--fs/buffer.c26
-rw-r--r--fs/ceph/dir.c6
-rw-r--r--fs/ceph/inode.c2
-rw-r--r--fs/ceph/mds_client.c70
-rw-r--r--fs/ceph/snap.c10
-rw-r--r--fs/cifs/cifs_dfs_ref.c4
-rw-r--r--fs/cifs/cifsglob.h2
-rw-r--r--fs/cifs/connect.c5
-rw-r--r--fs/cifs/file.c64
-rw-r--r--fs/cifs/inode.c71
-rw-r--r--fs/cifs/misc.c25
-rw-r--r--fs/cifs/readdir.c9
-rw-r--r--fs/cifs/smb1ops.c2
-rw-r--r--fs/cifs/smb2file.c4
-rw-r--r--fs/cifs/smb2maperror.c3
-rw-r--r--fs/cifs/smb2misc.c23
-rw-r--r--fs/cifs/smb2ops.c15
-rw-r--r--fs/cifs/smb2pdu.h4
-rw-r--r--fs/debugfs/inode.c20
-rw-r--r--fs/devpts/inode.c1
-rw-r--r--fs/direct-io.c29
-rw-r--r--fs/dlm/ast.c10
-rw-r--r--fs/drop_caches.c8
-rw-r--r--fs/eventpoll.c2
-rw-r--r--fs/exec.c2
-rw-r--r--fs/ext2/super.c39
-rw-r--r--fs/ext4/ext4.h3
-rw-r--r--fs/ext4/ext4_jbd2.h2
-rw-r--r--fs/ext4/file.c2
-rw-r--r--fs/ext4/fsync.c13
-rw-r--r--fs/ext4/indirect.c43
-rw-r--r--fs/ext4/ioctl.c13
-rw-r--r--fs/ext4/resize.c20
-rw-r--r--fs/ext4/xattr.c3
-rw-r--r--fs/f2fs/acl.c14
-rw-r--r--fs/f2fs/data.c12
-rw-r--r--fs/f2fs/f2fs.h11
-rw-r--r--fs/f2fs/file.c3
-rw-r--r--fs/f2fs/inline.c8
-rw-r--r--fs/f2fs/shrinker.c2
-rw-r--r--fs/f2fs/super.c34
-rw-r--r--fs/f2fs/trace.c8
-rw-r--r--fs/file.c1
-rw-r--r--fs/fs-writeback.c40
-rw-r--r--fs/fuse/dev.c16
-rw-r--r--fs/fuse/file.c2
-rw-r--r--fs/gfs2/glock.c2
-rw-r--r--fs/hugetlbfs/inode.c32
-rw-r--r--fs/iomap.c12
-rw-r--r--fs/jbd2/commit.c6
-rw-r--r--fs/jbd2/journal.c52
-rw-r--r--fs/jbd2/transaction.c33
-rw-r--r--fs/jffs2/readinode.c5
-rw-r--r--fs/jffs2/super.c5
-rw-r--r--fs/kernfs/mount.c8
-rw-r--r--fs/nfs/client.c2
-rw-r--r--fs/nfs/nfs4idmap.c31
-rw-r--r--fs/nfs/nfs4proc.c15
-rw-r--r--fs/nfs/pagelist.c29
-rw-r--r--fs/nfs/super.c11
-rw-r--r--fs/nfs/write.c11
-rw-r--r--fs/nfsd/nfs3proc.c16
-rw-r--r--fs/nfsd/nfs3xdr.c1
-rw-r--r--fs/nfsd/nfs4callback.c8
-rw-r--r--fs/nfsd/nfs4state.c57
-rw-r--r--fs/nfsd/nfsctl.c2
-rw-r--r--fs/nfsd/state.h3
-rw-r--r--fs/nfsd/xdr4.h13
-rw-r--r--fs/ocfs2/Makefile2
-rw-r--r--fs/ocfs2/buffer_head_io.c2
-rw-r--r--fs/ocfs2/cluster/nodemanager.c14
-rw-r--r--fs/ocfs2/dlm/Makefile2
-rw-r--r--fs/ocfs2/dlmfs/Makefile2
-rw-r--r--fs/ocfs2/refcounttree.c42
-rw-r--r--fs/open.c6
-rw-r--r--fs/pipe.c18
-rw-r--r--fs/proc/base.c4
-rw-r--r--fs/proc/proc_sysctl.c7
-rw-r--r--fs/proc/task_mmu.c40
-rw-r--r--fs/read_write.c6
-rw-r--r--fs/splice.c20
-rw-r--r--fs/udf/inode.c6
-rw-r--r--fs/udf/truncate.c3
-rw-r--r--fs/userfaultfd.c9
-rw-r--r--fs/xfs/libxfs/xfs_attr.c20
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.c9
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.h3
-rw-r--r--fs/xfs/libxfs/xfs_defer.c39
-rw-r--r--fs/xfs/libxfs/xfs_defer.h5
107 files changed, 1130 insertions, 385 deletions
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 8fb89ddc6cc7..c52f10efdc9c 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -61,6 +61,8 @@ enum {
Opt_cache_loose, Opt_fscache, Opt_mmap,
/* Access options */
Opt_access, Opt_posixacl,
+ /* Lock timeout option */
+ Opt_locktimeout,
/* Error token */
Opt_err
};
@@ -80,6 +82,7 @@ static const match_table_t tokens = {
{Opt_cachetag, "cachetag=%s"},
{Opt_access, "access=%s"},
{Opt_posixacl, "posixacl"},
+ {Opt_locktimeout, "locktimeout=%u"},
{Opt_err, NULL}
};
@@ -187,6 +190,7 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
#ifdef CONFIG_9P_FSCACHE
v9ses->cachetag = NULL;
#endif
+ v9ses->session_lock_timeout = P9_LOCK_TIMEOUT;
if (!opts)
return 0;
@@ -360,6 +364,23 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
#endif
break;
+ case Opt_locktimeout:
+ r = match_int(&args[0], &option);
+ if (r < 0) {
+ p9_debug(P9_DEBUG_ERROR,
+ "integer field, but no integer?\n");
+ ret = r;
+ continue;
+ }
+ if (option < 1) {
+ p9_debug(P9_DEBUG_ERROR,
+ "locktimeout must be a greater than zero integer.\n");
+ ret = -EINVAL;
+ continue;
+ }
+ v9ses->session_lock_timeout = (long)option * HZ;
+ break;
+
default:
continue;
}
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 982e017acadb..129e5243a6bf 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -116,6 +116,7 @@ struct v9fs_session_info {
struct p9_client *clnt; /* 9p client */
struct list_head slist; /* list of sessions registered with v9fs */
struct rw_semaphore rename_sem;
+ long session_lock_timeout; /* retry interval for blocking locks */
};
/* cache_validity flags */
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index 5a0db6dec8d1..aaee1e6584e6 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -40,6 +40,9 @@
*/
#define P9_LOCK_TIMEOUT (30*HZ)
+/* flags for v9fs_stat2inode() & v9fs_stat2inode_dotl() */
+#define V9FS_STAT2INODE_KEEP_ISIZE 1
+
extern struct file_system_type v9fs_fs_type;
extern const struct address_space_operations v9fs_addr_operations;
extern const struct file_operations v9fs_file_operations;
@@ -61,8 +64,10 @@ int v9fs_init_inode(struct v9fs_session_info *v9ses,
struct inode *inode, umode_t mode, dev_t);
void v9fs_evict_inode(struct inode *inode);
ino_t v9fs_qid2ino(struct p9_qid *qid);
-void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *);
-void v9fs_stat2inode_dotl(struct p9_stat_dotl *, struct inode *);
+void v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
+ struct super_block *sb, unsigned int flags);
+void v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode,
+ unsigned int flags);
int v9fs_dir_release(struct inode *inode, struct file *filp);
int v9fs_file_open(struct inode *inode, struct file *file);
void v9fs_inode2stat(struct inode *inode, struct p9_wstat *stat);
@@ -83,4 +88,18 @@ static inline void v9fs_invalidate_inode_attr(struct inode *inode)
}
int v9fs_open_to_dotl_flags(int flags);
+
+static inline void v9fs_i_size_write(struct inode *inode, loff_t i_size)
+{
+ /*
+ * 32-bit need the lock, concurrent updates could break the
+ * sequences and make i_size_read() loop forever.
+ * 64-bit updates are atomic and can skip the locking.
+ */
+ if (sizeof(i_size) > sizeof(long))
+ spin_lock(&inode->i_lock);
+ i_size_write(inode, i_size);
+ if (sizeof(i_size) > sizeof(long))
+ spin_unlock(&inode->i_lock);
+}
#endif
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index 48db9a9f13f9..cb6c4031af55 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -105,7 +105,6 @@ static int v9fs_dir_readdir(struct file *file, struct dir_context *ctx)
int err = 0;
struct p9_fid *fid;
int buflen;
- int reclen = 0;
struct p9_rdir *rdir;
struct kvec kvec;
@@ -138,11 +137,10 @@ static int v9fs_dir_readdir(struct file *file, struct dir_context *ctx)
while (rdir->head < rdir->tail) {
err = p9stat_read(fid->clnt, rdir->buf + rdir->head,
rdir->tail - rdir->head, &st);
- if (err) {
+ if (err <= 0) {
p9_debug(P9_DEBUG_VFS, "returned %d\n", err);
return -EIO;
}
- reclen = st.size+2;
over = !dir_emit(ctx, st.name, strlen(st.name),
v9fs_qid2ino(&st.qid), dt_type(&st));
@@ -150,8 +148,8 @@ static int v9fs_dir_readdir(struct file *file, struct dir_context *ctx)
if (over)
return 0;
- rdir->head += reclen;
- ctx->pos += reclen;
+ rdir->head += err;
+ ctx->pos += err;
}
}
}
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 3a2f37ad1f89..89e69904976a 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -154,6 +154,7 @@ static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl)
uint8_t status = P9_LOCK_ERROR;
int res = 0;
unsigned char fl_type;
+ struct v9fs_session_info *v9ses;
fid = filp->private_data;
BUG_ON(fid == NULL);
@@ -189,6 +190,8 @@ static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl)
if (IS_SETLKW(cmd))
flock.flags = P9_LOCK_FLAGS_BLOCK;
+ v9ses = v9fs_inode2v9ses(file_inode(filp));
+
/*
* if its a blocked request and we get P9_LOCK_BLOCKED as the status
* for lock request, keep on trying
@@ -202,7 +205,8 @@ static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl)
break;
if (status == P9_LOCK_BLOCKED && !IS_SETLKW(cmd))
break;
- if (schedule_timeout_interruptible(P9_LOCK_TIMEOUT) != 0)
+ if (schedule_timeout_interruptible(v9ses->session_lock_timeout)
+ != 0)
break;
/*
* p9_client_lock_dotl overwrites flock.client_id with the
@@ -442,7 +446,11 @@ v9fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
i_size = i_size_read(inode);
if (iocb->ki_pos > i_size) {
inode_add_bytes(inode, iocb->ki_pos - i_size);
- i_size_write(inode, iocb->ki_pos);
+ /*
+ * Need to serialize against i_size_write() in
+ * v9fs_stat2inode()
+ */
+ v9fs_i_size_write(inode, iocb->ki_pos);
}
return retval;
}
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index bdabb2765d1b..e88cb25176dc 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -538,7 +538,7 @@ static struct inode *v9fs_qid_iget(struct super_block *sb,
if (retval)
goto error;
- v9fs_stat2inode(st, inode, sb);
+ v9fs_stat2inode(st, inode, sb, 0);
v9fs_cache_inode_get_cookie(inode);
unlock_new_inode(inode);
return inode;
@@ -1080,7 +1080,7 @@ v9fs_vfs_getattr(const struct path *path, struct kstat *stat,
if (IS_ERR(st))
return PTR_ERR(st);
- v9fs_stat2inode(st, d_inode(dentry), dentry->d_sb);
+ v9fs_stat2inode(st, d_inode(dentry), dentry->d_sb, 0);
generic_fillattr(d_inode(dentry), stat);
p9stat_free(st);
@@ -1158,12 +1158,13 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
* @stat: Plan 9 metadata (mistat) structure
* @inode: inode to populate
* @sb: superblock of filesystem
+ * @flags: control flags (e.g. V9FS_STAT2INODE_KEEP_ISIZE)
*
*/
void
v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
- struct super_block *sb)
+ struct super_block *sb, unsigned int flags)
{
umode_t mode;
char ext[32];
@@ -1204,10 +1205,11 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
mode = p9mode2perm(v9ses, stat);
mode |= inode->i_mode & ~S_IALLUGO;
inode->i_mode = mode;
- i_size_write(inode, stat->length);
+ if (!(flags & V9FS_STAT2INODE_KEEP_ISIZE))
+ v9fs_i_size_write(inode, stat->length);
/* not real number of blocks, but 512 byte ones ... */
- inode->i_blocks = (i_size_read(inode) + 512 - 1) >> 9;
+ inode->i_blocks = (stat->length + 512 - 1) >> 9;
v9inode->cache_validity &= ~V9FS_INO_INVALID_ATTR;
}
@@ -1404,9 +1406,9 @@ int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode)
{
int umode;
dev_t rdev;
- loff_t i_size;
struct p9_wstat *st;
struct v9fs_session_info *v9ses;
+ unsigned int flags;
v9ses = v9fs_inode2v9ses(inode);
st = p9_client_stat(fid);
@@ -1419,16 +1421,13 @@ int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode)
if ((inode->i_mode & S_IFMT) != (umode & S_IFMT))
goto out;
- spin_lock(&inode->i_lock);
/*
* We don't want to refresh inode->i_size,
* because we may have cached data
*/
- i_size = inode->i_size;
- v9fs_stat2inode(st, inode, inode->i_sb);
- if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
- inode->i_size = i_size;
- spin_unlock(&inode->i_lock);
+ flags = (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) ?
+ V9FS_STAT2INODE_KEEP_ISIZE : 0;
+ v9fs_stat2inode(st, inode, inode->i_sb, flags);
out:
p9stat_free(st);
kfree(st);
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 7f6ae21a27b3..3446ab1f44e7 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -143,7 +143,7 @@ static struct inode *v9fs_qid_iget_dotl(struct super_block *sb,
if (retval)
goto error;
- v9fs_stat2inode_dotl(st, inode);
+ v9fs_stat2inode_dotl(st, inode, 0);
v9fs_cache_inode_get_cookie(inode);
retval = v9fs_get_acl(inode, fid);
if (retval)
@@ -497,7 +497,7 @@ v9fs_vfs_getattr_dotl(const struct path *path, struct kstat *stat,
if (IS_ERR(st))
return PTR_ERR(st);
- v9fs_stat2inode_dotl(st, d_inode(dentry));
+ v9fs_stat2inode_dotl(st, d_inode(dentry), 0);
generic_fillattr(d_inode(dentry), stat);
/* Change block size to what the server returned */
stat->blksize = st->st_blksize;
@@ -608,11 +608,13 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
* v9fs_stat2inode_dotl - populate an inode structure with stat info
* @stat: stat structure
* @inode: inode to populate
+ * @flags: ctrl flags (e.g. V9FS_STAT2INODE_KEEP_ISIZE)
*
*/
void
-v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
+v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode,
+ unsigned int flags)
{
umode_t mode;
struct v9fs_inode *v9inode = V9FS_I(inode);
@@ -632,7 +634,8 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
mode |= inode->i_mode & ~S_IALLUGO;
inode->i_mode = mode;
- i_size_write(inode, stat->st_size);
+ if (!(flags & V9FS_STAT2INODE_KEEP_ISIZE))
+ v9fs_i_size_write(inode, stat->st_size);
inode->i_blocks = stat->st_blocks;
} else {
if (stat->st_result_mask & P9_STATS_ATIME) {
@@ -662,8 +665,9 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
}
if (stat->st_result_mask & P9_STATS_RDEV)
inode->i_rdev = new_decode_dev(stat->st_rdev);
- if (stat->st_result_mask & P9_STATS_SIZE)
- i_size_write(inode, stat->st_size);
+ if (!(flags & V9FS_STAT2INODE_KEEP_ISIZE) &&
+ stat->st_result_mask & P9_STATS_SIZE)
+ v9fs_i_size_write(inode, stat->st_size);
if (stat->st_result_mask & P9_STATS_BLOCKS)
inode->i_blocks = stat->st_blocks;
}
@@ -929,9 +933,9 @@ v9fs_vfs_get_link_dotl(struct dentry *dentry,
int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode)
{
- loff_t i_size;
struct p9_stat_dotl *st;
struct v9fs_session_info *v9ses;
+ unsigned int flags;
v9ses = v9fs_inode2v9ses(inode);
st = p9_client_getattr_dotl(fid, P9_STATS_ALL);
@@ -943,16 +947,13 @@ int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode)
if ((inode->i_mode & S_IFMT) != (st->st_mode & S_IFMT))
goto out;
- spin_lock(&inode->i_lock);
/*
* We don't want to refresh inode->i_size,
* because we may have cached data
*/
- i_size = inode->i_size;
- v9fs_stat2inode_dotl(st, inode);
- if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
- inode->i_size = i_size;
- spin_unlock(&inode->i_lock);
+ flags = (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) ?
+ V9FS_STAT2INODE_KEEP_ISIZE : 0;
+ v9fs_stat2inode_dotl(st, inode, flags);
out:
kfree(st);
return 0;
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 8b75463cb211..d4400779f6d9 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -172,7 +172,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
goto release_sb;
}
d_inode(root)->i_ino = v9fs_qid2ino(&st->qid);
- v9fs_stat2inode_dotl(st, d_inode(root));
+ v9fs_stat2inode_dotl(st, d_inode(root), 0);
kfree(st);
} else {
struct p9_wstat *st = NULL;
@@ -183,7 +183,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
}
d_inode(root)->i_ino = v9fs_qid2ino(&st->qid);
- v9fs_stat2inode(st, d_inode(root), sb);
+ v9fs_stat2inode(st, d_inode(root), sb, 0);
p9stat_free(st);
kfree(st);
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 57725d4a8c59..141f9bc213a3 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -567,7 +567,6 @@ int autofs4_expire_run(struct super_block *sb,
pkt.len = dentry->d_name.len;
memcpy(pkt.name, dentry->d_name.name, pkt.len);
pkt.name[pkt.len] = '\0';
- dput(dentry);
if (copy_to_user(pkt_p, &pkt, sizeof(struct autofs_packet_expire)))
ret = -EFAULT;
@@ -580,6 +579,8 @@ int autofs4_expire_run(struct super_block *sb,
complete_all(&ino->expire_complete);
spin_unlock(&sbi->fs_lock);
+ dput(dentry);
+
return ret;
}
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 3c7e727612fa..e455388a939c 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -259,8 +259,10 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
}
root_inode = autofs4_get_inode(s, S_IFDIR | 0755);
root = d_make_root(root_inode);
- if (!root)
+ if (!root) {
+ ret = -ENOMEM;
goto fail_ino;
+ }
pipe = NULL;
root->d_fsdata = ino;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 23e62bad0b75..b4b462d289f3 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -306,10 +306,10 @@ static void blkdev_bio_end_io(struct bio *bio)
struct blkdev_dio *dio = bio->bi_private;
bool should_dirty = dio->should_dirty;
- if (dio->multi_bio && !atomic_dec_and_test(&dio->ref)) {
- if (bio->bi_status && !dio->bio.bi_status)
- dio->bio.bi_status = bio->bi_status;
- } else {
+ if (bio->bi_status && !dio->bio.bi_status)
+ dio->bio.bi_status = bio->bi_status;
+
+ if (!dio->multi_bio || atomic_dec_and_test(&dio->ref)) {
if (!dio->is_sync) {
struct kiocb *iocb = dio->iocb;
ssize_t ret;
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 1ba49ebe67da..1c42d9f1d6c8 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -22,6 +22,7 @@
#include <linux/posix_acl_xattr.h>
#include <linux/posix_acl.h>
#include <linux/sched.h>
+#include <linux/sched/mm.h>
#include <linux/slab.h>
#include "ctree.h"
@@ -89,8 +90,16 @@ static int __btrfs_set_acl(struct btrfs_trans_handle *trans,
}
if (acl) {
+ unsigned int nofs_flag;
+
size = posix_acl_xattr_size(acl->a_count);
+ /*
+ * We're holding a transaction handle, so use a NOFS memory
+ * allocation context to avoid deadlock if reclaim happens.
+ */
+ nofs_flag = memalloc_nofs_save();
value = kmalloc(size, GFP_KERNEL);
+ memalloc_nofs_restore(nofs_flag);
if (!value) {
ret = -ENOMEM;
goto out;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 5b62e06567a3..4cc534584665 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3014,11 +3014,11 @@ static int __do_readpage(struct extent_io_tree *tree,
*/
if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) &&
prev_em_start && *prev_em_start != (u64)-1 &&
- *prev_em_start != em->orig_start)
+ *prev_em_start != em->start)
force_bio_submit = true;
if (prev_em_start)
- *prev_em_start = em->orig_start;
+ *prev_em_start = em->start;
free_extent_map(em);
em = NULL;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index cddd63b9103f..dd3b4820ac30 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -357,6 +357,16 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
+ /*
+ * If the fs is mounted with nologreplay, which requires it to be
+ * mounted in RO mode as well, we can not allow discard on free space
+ * inside block groups, because log trees refer to extents that are not
+ * pinned in a block group's free space cache (pinning the extents is
+ * precisely the first phase of replaying a log tree).
+ */
+ if (btrfs_test_opt(fs_info, NOLOGREPLAY))
+ return -EROFS;
+
rcu_read_lock();
list_for_each_entry_rcu(device, &fs_info->fs_devices->devices,
dev_list) {
diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c
index cbabc6f2b322..266f9069307b 100644
--- a/fs/btrfs/props.c
+++ b/fs/btrfs/props.c
@@ -386,11 +386,11 @@ int btrfs_subvol_inherit_props(struct btrfs_trans_handle *trans,
static int prop_compression_validate(const char *value, size_t len)
{
- if (!strncmp("lzo", value, len))
+ if (!strncmp("lzo", value, 3))
return 0;
- else if (!strncmp("zlib", value, len))
+ else if (!strncmp("zlib", value, 4))
return 0;
- else if (!strncmp("zstd", value, len))
+ else if (!strncmp("zstd", value, 4))
return 0;
return -EINVAL;
@@ -416,7 +416,7 @@ static int prop_compression_apply(struct inode *inode,
btrfs_set_fs_incompat(fs_info, COMPRESS_LZO);
} else if (!strncmp("zlib", value, 4)) {
type = BTRFS_COMPRESS_ZLIB;
- } else if (!strncmp("zstd", value, len)) {
+ } else if (!strncmp("zstd", value, 4)) {
type = BTRFS_COMPRESS_ZSTD;
btrfs_set_fs_incompat(fs_info, COMPRESS_ZSTD);
} else {
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 2e995e565633..1e35a2327478 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -2414,8 +2414,9 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
bitmap_clear(rbio->dbitmap, pagenr, 1);
kunmap(p);
- for (stripe = 0; stripe < rbio->real_stripes; stripe++)
+ for (stripe = 0; stripe < nr_data; stripe++)
kunmap(page_in_rbio(rbio, stripe, pagenr, 0));
+ kunmap(p_page);
}
__free_page(p_page);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 179a383a4aaa..9d72882b0f72 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3422,9 +3422,16 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
}
btrfs_release_path(path);
- /* find the first key from this transaction again */
+ /*
+ * Find the first key from this transaction again. See the note for
+ * log_new_dir_dentries, if we're logging a directory recursively we
+ * won't be holding its i_mutex, which means we can modify the directory
+ * while we're logging it. If we remove an entry between our first
+ * search and this search we'll not find the key again and can just
+ * bail.
+ */
ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0);
- if (WARN_ON(ret != 0))
+ if (ret != 0)
goto done;
/*
@@ -4501,6 +4508,19 @@ static int logged_inode_size(struct btrfs_root *log, struct btrfs_inode *inode,
item = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_inode_item);
*size_ret = btrfs_inode_size(path->nodes[0], item);
+ /*
+ * If the in-memory inode's i_size is smaller then the inode
+ * size stored in the btree, return the inode's i_size, so
+ * that we get a correct inode size after replaying the log
+ * when before a power failure we had a shrinking truncate
+ * followed by addition of a new name (rename / new hard link).
+ * Otherwise return the inode size from the btree, to avoid
+ * data loss when replaying a log due to previously doing a
+ * write that expands the inode's size and logging a new name
+ * immediately after.
+ */
+ if (*size_ret > inode->vfs_inode.i_size)
+ *size_ret = inode->vfs_inode.i_size;
}
btrfs_release_path(path);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 9663b6aa2a56..38ed8e259e00 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6420,10 +6420,10 @@ static int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info,
}
if ((type & BTRFS_BLOCK_GROUP_RAID10 && sub_stripes != 2) ||
- (type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes < 1) ||
+ (type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes != 2) ||
(type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) ||
(type & BTRFS_BLOCK_GROUP_RAID6 && num_stripes < 3) ||
- (type & BTRFS_BLOCK_GROUP_DUP && num_stripes > 2) ||
+ (type & BTRFS_BLOCK_GROUP_DUP && num_stripes != 2) ||
((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 &&
num_stripes != 1)) {
btrfs_err(fs_info,
diff --git a/fs/buffer.c b/fs/buffer.c
index b96f3b98a6ef..bdca7b10e239 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -208,6 +208,7 @@ __find_get_block_slow(struct block_device *bdev, sector_t block)
struct buffer_head *head;
struct page *page;
int all_mapped = 1;
+ static DEFINE_RATELIMIT_STATE(last_warned, HZ, 1);
index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
page = find_get_page_flags(bd_mapping, index, FGP_ACCESSED);
@@ -235,15 +236,15 @@ __find_get_block_slow(struct block_device *bdev, sector_t block)
* file io on the block device and getblk. It gets dealt with
* elsewhere, don't buffer_error if we had some unmapped buffers
*/
- if (all_mapped) {
- printk("__find_get_block_slow() failed. "
- "block=%llu, b_blocknr=%llu\n",
- (unsigned long long)block,
- (unsigned long long)bh->b_blocknr);
- printk("b_state=0x%08lx, b_size=%zu\n",
- bh->b_state, bh->b_size);
- printk("device %pg blocksize: %d\n", bdev,
- 1 << bd_inode->i_blkbits);
+ ratelimit_set_flags(&last_warned, RATELIMIT_MSG_ON_RELEASE);
+ if (all_mapped && __ratelimit(&last_warned)) {
+ printk("__find_get_block_slow() failed. block=%llu, "
+ "b_blocknr=%llu, b_state=0x%08lx, b_size=%zu, "
+ "device %pg blocksize: %d\n",
+ (unsigned long long)block,
+ (unsigned long long)bh->b_blocknr,
+ bh->b_state, bh->b_size, bdev,
+ 1 << bd_inode->i_blkbits);
}
out_unlock:
spin_unlock(&bd_mapping->private_lock);
@@ -3083,6 +3084,13 @@ void guard_bio_eod(int op, struct bio *bio)
/* Uhhuh. We've got a bio that straddles the device size! */
truncated_bytes = bio->bi_iter.bi_size - (maxsector << 9);
+ /*
+ * The bio contains more than one segment which spans EOD, just return
+ * and let IO layer turn it into an EIO
+ */
+ if (truncated_bytes > bvec->bv_len)
+ return;
+
/* Truncate the bio.. */
bio->bi_iter.bi_size -= truncated_bytes;
bvec->bv_len -= truncated_bytes;
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 8a5266699b67..56e8fc896f6b 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -1454,6 +1454,7 @@ void ceph_dentry_lru_del(struct dentry *dn)
unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn)
{
struct ceph_inode_info *dci = ceph_inode(dir);
+ unsigned hash;
switch (dci->i_dir_layout.dl_dir_hash) {
case 0: /* for backward compat */
@@ -1461,8 +1462,11 @@ unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn)
return dn->d_name.hash;
default:
- return ceph_str_hash(dci->i_dir_layout.dl_dir_hash,
+ spin_lock(&dn->d_lock);
+ hash = ceph_str_hash(dci->i_dir_layout.dl_dir_hash,
dn->d_name.name, dn->d_name.len);
+ spin_unlock(&dn->d_lock);
+ return hash;
}
}
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index a1492bdc6d03..f2b722f0df5d 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -520,6 +520,7 @@ static void ceph_i_callback(struct rcu_head *head)
struct inode *inode = container_of(head, struct inode, i_rcu);
struct ceph_inode_info *ci = ceph_inode(inode);
+ kfree(ci->i_symlink);
kmem_cache_free(ceph_inode_cachep, ci);
}
@@ -551,7 +552,6 @@ void ceph_destroy_inode(struct inode *inode)
ceph_put_snap_realm(mdsc, realm);
}
- kfree(ci->i_symlink);
while ((n = rb_first(&ci->i_fragtree)) != NULL) {
frag = rb_entry(n, struct ceph_inode_frag, node);
rb_erase(n, &ci->i_fragtree);
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index a48984dd6426..e1ded4bd6115 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1219,6 +1219,15 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
list_add(&ci->i_prealloc_cap_flush->i_list, &to_remove);
ci->i_prealloc_cap_flush = NULL;
}
+
+ if (drop &&
+ ci->i_wrbuffer_ref_head == 0 &&
+ ci->i_wr_ref == 0 &&
+ ci->i_dirty_caps == 0 &&
+ ci->i_flushing_caps == 0) {
+ ceph_put_snap_context(ci->i_head_snapc);
+ ci->i_head_snapc = NULL;
+ }
}
spin_unlock(&ci->i_ceph_lock);
while (!list_empty(&to_remove)) {
@@ -1863,10 +1872,39 @@ retry:
return path;
}
+/* Duplicate the dentry->d_name.name safely */
+static int clone_dentry_name(struct dentry *dentry, const char **ppath,
+ int *ppathlen)
+{
+ u32 len;
+ char *name;
+
+retry:
+ len = READ_ONCE(dentry->d_name.len);
+ name = kmalloc(len + 1, GFP_NOFS);
+ if (!name)
+ return -ENOMEM;
+
+ spin_lock(&dentry->d_lock);
+ if (dentry->d_name.len != len) {
+ spin_unlock(&dentry->d_lock);
+ kfree(name);
+ goto retry;
+ }
+ memcpy(name, dentry->d_name.name, len);
+ spin_unlock(&dentry->d_lock);
+
+ name[len] = '\0';
+ *ppath = name;
+ *ppathlen = len;
+ return 0;
+}
+
static int build_dentry_path(struct dentry *dentry, struct inode *dir,
const char **ppath, int *ppathlen, u64 *pino,
- int *pfreepath)
+ bool *pfreepath, bool parent_locked)
{
+ int ret;
char *path;
rcu_read_lock();
@@ -1875,8 +1913,15 @@ static int build_dentry_path(struct dentry *dentry, struct inode *dir,
if (dir && ceph_snap(dir) == CEPH_NOSNAP) {
*pino = ceph_ino(dir);
rcu_read_unlock();
- *ppath = dentry->d_name.name;
- *ppathlen = dentry->d_name.len;
+ if (parent_locked) {
+ *ppath = dentry->d_name.name;
+ *ppathlen = dentry->d_name.len;
+ } else {
+ ret = clone_dentry_name(dentry, ppath, ppathlen);
+ if (ret)
+ return ret;
+ *pfreepath = true;
+ }
return 0;
}
rcu_read_unlock();
@@ -1884,13 +1929,13 @@ static int build_dentry_path(struct dentry *dentry, struct inode *dir,
if (IS_ERR(path))
return PTR_ERR(path);
*ppath = path;
- *pfreepath = 1;
+ *pfreepath = true;
return 0;
}
static int build_inode_path(struct inode *inode,
const char **ppath, int *ppathlen, u64 *pino,
- int *pfreepath)
+ bool *pfreepath)
{
struct dentry *dentry;
char *path;
@@ -1906,7 +1951,7 @@ static int build_inode_path(struct inode *inode,
if (IS_ERR(path))
return PTR_ERR(path);
*ppath = path;
- *pfreepath = 1;
+ *pfreepath = true;
return 0;
}
@@ -1917,7 +1962,7 @@ static int build_inode_path(struct inode *inode,
static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
struct inode *rdiri, const char *rpath,
u64 rino, const char **ppath, int *pathlen,
- u64 *ino, int *freepath)
+ u64 *ino, bool *freepath, bool parent_locked)
{
int r = 0;
@@ -1927,7 +1972,7 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
ceph_snap(rinode));
} else if (rdentry) {
r = build_dentry_path(rdentry, rdiri, ppath, pathlen, ino,
- freepath);
+ freepath, parent_locked);
dout(" dentry %p %llx/%.*s\n", rdentry, *ino, *pathlen,
*ppath);
} else if (rpath || rino) {
@@ -1953,7 +1998,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
const char *path2 = NULL;
u64 ino1 = 0, ino2 = 0;
int pathlen1 = 0, pathlen2 = 0;
- int freepath1 = 0, freepath2 = 0;
+ bool freepath1 = false, freepath2 = false;
int len;
u16 releases;
void *p, *end;
@@ -1961,16 +2006,19 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
ret = set_request_path_attr(req->r_inode, req->r_dentry,
req->r_parent, req->r_path1, req->r_ino1.ino,
- &path1, &pathlen1, &ino1, &freepath1);
+ &path1, &pathlen1, &ino1, &freepath1,
+ test_bit(CEPH_MDS_R_PARENT_LOCKED,
+ &req->r_req_flags));
if (ret < 0) {
msg = ERR_PTR(ret);
goto out;
}
+ /* If r_old_dentry is set, then assume that its parent is locked */
ret = set_request_path_attr(NULL, req->r_old_dentry,
req->r_old_dentry_dir,
req->r_path2, req->r_ino2.ino,
- &path2, &pathlen2, &ino2, &freepath2);
+ &path2, &pathlen2, &ino2, &freepath2, true);
if (ret < 0) {
msg = ERR_PTR(ret);
goto out_free1;
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 8a2ca41e4b97..a7e763dac038 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -568,7 +568,12 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
old_snapc = NULL;
update_snapc:
- if (ci->i_head_snapc) {
+ if (ci->i_wrbuffer_ref_head == 0 &&
+ ci->i_wr_ref == 0 &&
+ ci->i_dirty_caps == 0 &&
+ ci->i_flushing_caps == 0) {
+ ci->i_head_snapc = NULL;
+ } else {
ci->i_head_snapc = ceph_get_snap_context(new_snapc);
dout(" new snapc is %p\n", new_snapc);
}
@@ -616,7 +621,8 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
capsnap->size);
spin_lock(&mdsc->snap_flush_lock);
- list_add_tail(&ci->i_snap_flush_item, &mdsc->snap_flush_list);
+ if (list_empty(&ci->i_snap_flush_item))
+ list_add_tail(&ci->i_snap_flush_item, &mdsc->snap_flush_list);
spin_unlock(&mdsc->snap_flush_lock);
return 1; /* caller may want to ceph_flush_snaps */
}
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 6b61df117fd4..563e2f6268c3 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -271,9 +271,9 @@ static void dump_referral(const struct dfs_info3_param *ref)
{
cifs_dbg(FYI, "DFS: ref path: %s\n", ref->path_name);
cifs_dbg(FYI, "DFS: node path: %s\n", ref->node_name);
- cifs_dbg(FYI, "DFS: fl: %hd, srv_type: %hd\n",
+ cifs_dbg(FYI, "DFS: fl: %d, srv_type: %d\n",
ref->flags, ref->server_type);
- cifs_dbg(FYI, "DFS: ref_flags: %hd, path_consumed: %hd\n",
+ cifs_dbg(FYI, "DFS: ref_flags: %d, path_consumed: %d\n",
ref->ref_flag, ref->path_consumed);
}
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index f29cdb1cdeb7..7b7ab10a9db1 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -1189,6 +1189,7 @@ cifsFileInfo_get_locked(struct cifsFileInfo *cifs_file)
}
struct cifsFileInfo *cifsFileInfo_get(struct cifsFileInfo *cifs_file);
+void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_hdlr);
void cifsFileInfo_put(struct cifsFileInfo *cifs_file);
#define CIFS_CACHE_READ_FLG 1
@@ -1693,6 +1694,7 @@ GLOBAL_EXTERN spinlock_t gidsidlock;
#endif /* CONFIG_CIFS_ACL */
void cifs_oplock_break(struct work_struct *work);
+void cifs_queue_oplock_break(struct cifsFileInfo *cfile);
extern const struct slow_work_ops cifs_oplock_break_ops;
extern struct workqueue_struct *cifsiod_wq;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 48aa854c564a..33cd844579ae 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1265,6 +1265,11 @@ cifs_parse_devname(const char *devname, struct smb_vol *vol)
const char *delims = "/\\";
size_t len;
+ if (unlikely(!devname || !*devname)) {
+ cifs_dbg(VFS, "Device name not specified.\n");
+ return -EINVAL;
+ }
+
/* make sure we have a valid UNC double delimiter prefix */
len = strspn(devname, delims);
if (len != 2)
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 1e176e11dbfa..48ea9dfd5f02 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -358,13 +358,31 @@ cifsFileInfo_get(struct cifsFileInfo *cifs_file)
return cifs_file;
}
-/*
- * Release a reference on the file private data. This may involve closing
- * the filehandle out on the server. Must be called without holding
- * tcon->open_file_lock and cifs_file->file_info_lock.
+/**
+ * cifsFileInfo_put - release a reference of file priv data
+ *
+ * Always potentially wait for oplock handler. See _cifsFileInfo_put().
*/
void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
{
+ _cifsFileInfo_put(cifs_file, true);
+}
+
+/**
+ * _cifsFileInfo_put - release a reference of file priv data
+ *
+ * This may involve closing the filehandle @cifs_file out on the
+ * server. Must be called without holding tcon->open_file_lock and
+ * cifs_file->file_info_lock.
+ *
+ * If @wait_for_oplock_handler is true and we are releasing the last
+ * reference, wait for any running oplock break handler of the file
+ * and cancel any pending one. If calling this function from the
+ * oplock break handler, you need to pass false.
+ *
+ */
+void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_handler)
+{
struct inode *inode = d_inode(cifs_file->dentry);
struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
struct TCP_Server_Info *server = tcon->ses->server;
@@ -411,7 +429,8 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
spin_unlock(&tcon->open_file_lock);
- oplock_break_cancelled = cancel_work_sync(&cifs_file->oplock_break);
+ oplock_break_cancelled = wait_oplock_handler ?
+ cancel_work_sync(&cifs_file->oplock_break) : false;
if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
struct TCP_Server_Info *server = tcon->ses->server;
@@ -1128,6 +1147,10 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
return -EINVAL;
}
+ BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
+ PAGE_SIZE);
+ max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
+ PAGE_SIZE);
max_num = (max_buf - sizeof(struct smb_hdr)) /
sizeof(LOCKING_ANDX_RANGE);
buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
@@ -1466,6 +1489,10 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
return -EINVAL;
+ BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
+ PAGE_SIZE);
+ max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
+ PAGE_SIZE);
max_num = (max_buf - sizeof(struct smb_hdr)) /
sizeof(LOCKING_ANDX_RANGE);
buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
@@ -1623,8 +1650,20 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
rc = server->ops->mand_unlock_range(cfile, flock, xid);
out:
- if (flock->fl_flags & FL_POSIX && !rc)
+ if (flock->fl_flags & FL_POSIX) {
+ /*
+ * If this is a request to remove all locks because we
+ * are closing the file, it doesn't matter if the
+ * unlocking failed as both cifs.ko and the SMB server
+ * remove the lock on file close
+ */
+ if (rc) {
+ cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
+ if (!(flock->fl_flags & FL_CLOSE))
+ return rc;
+ }
rc = locks_lock_file_wait(file, flock);
+ }
return rc;
}
@@ -2881,14 +2920,16 @@ cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
* these pages but not on the region from pos to ppos+len-1.
*/
written = cifs_user_writev(iocb, from);
- if (written > 0 && CIFS_CACHE_READ(cinode)) {
+ if (CIFS_CACHE_READ(cinode)) {
/*
- * Windows 7 server can delay breaking level2 oplock if a write
- * request comes - break it on the client to prevent reading
- * an old data.
+ * We have read level caching and we have just sent a write
+ * request to the server thus making data in the cache stale.
+ * Zap the cache and set oplock/lease level to NONE to avoid
+ * reading stale data from the cache. All subsequent read
+ * operations will read new data from the server.
*/
cifs_zap_mapping(inode);
- cifs_dbg(FYI, "Set no oplock for inode=%p after a write operation\n",
+ cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
inode);
cinode->oplock = 0;
}
@@ -4114,6 +4155,7 @@ void cifs_oplock_break(struct work_struct *work)
cinode);
cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
}
+ _cifsFileInfo_put(cfile, false /* do not wait for ourself */);
cifs_done_oplock_break(cinode);
}
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index a90a637ae79a..e7192ee7a89c 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -779,43 +779,50 @@ cifs_get_inode_info(struct inode **inode, const char *full_path,
} else if ((rc == -EACCES) && backup_cred(cifs_sb) &&
(strcmp(server->vals->version_string, SMB1_VERSION_STRING)
== 0)) {
- /*
- * For SMB2 and later the backup intent flag is already
- * sent if needed on open and there is no path based
- * FindFirst operation to use to retry with
- */
+ /*
+ * For SMB2 and later the backup intent flag is already
+ * sent if needed on open and there is no path based
+ * FindFirst operation to use to retry with
+ */
- srchinf = kzalloc(sizeof(struct cifs_search_info),
- GFP_KERNEL);
- if (srchinf == NULL) {
- rc = -ENOMEM;
- goto cgii_exit;
- }
+ srchinf = kzalloc(sizeof(struct cifs_search_info),
+ GFP_KERNEL);
+ if (srchinf == NULL) {
+ rc = -ENOMEM;
+ goto cgii_exit;
+ }
- srchinf->endOfSearch = false;
+ srchinf->endOfSearch = false;
+ if (tcon->unix_ext)
+ srchinf->info_level = SMB_FIND_FILE_UNIX;
+ else if ((tcon->ses->capabilities &
+ tcon->ses->server->vals->cap_nt_find) == 0)
+ srchinf->info_level = SMB_FIND_FILE_INFO_STANDARD;
+ else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)
srchinf->info_level = SMB_FIND_FILE_ID_FULL_DIR_INFO;
+ else /* no srvino useful for fallback to some netapp */
+ srchinf->info_level = SMB_FIND_FILE_DIRECTORY_INFO;
- srchflgs = CIFS_SEARCH_CLOSE_ALWAYS |
- CIFS_SEARCH_CLOSE_AT_END |
- CIFS_SEARCH_BACKUP_SEARCH;
+ srchflgs = CIFS_SEARCH_CLOSE_ALWAYS |
+ CIFS_SEARCH_CLOSE_AT_END |
+ CIFS_SEARCH_BACKUP_SEARCH;
- rc = CIFSFindFirst(xid, tcon, full_path,
- cifs_sb, NULL, srchflgs, srchinf, false);
- if (!rc) {
- data =
- (FILE_ALL_INFO *)srchinf->srch_entries_start;
+ rc = CIFSFindFirst(xid, tcon, full_path,
+ cifs_sb, NULL, srchflgs, srchinf, false);
+ if (!rc) {
+ data = (FILE_ALL_INFO *)srchinf->srch_entries_start;
- cifs_dir_info_to_fattr(&fattr,
- (FILE_DIRECTORY_INFO *)data, cifs_sb);
- fattr.cf_uniqueid = le64_to_cpu(
- ((SEARCH_ID_FULL_DIR_INFO *)data)->UniqueId);
- validinum = true;
+ cifs_dir_info_to_fattr(&fattr,
+ (FILE_DIRECTORY_INFO *)data, cifs_sb);
+ fattr.cf_uniqueid = le64_to_cpu(
+ ((SEARCH_ID_FULL_DIR_INFO *)data)->UniqueId);
+ validinum = true;
- cifs_buf_release(srchinf->ntwrk_buf_start);
- }
- kfree(srchinf);
- if (rc)
- goto cgii_exit;
+ cifs_buf_release(srchinf->ntwrk_buf_start);
+ }
+ kfree(srchinf);
+ if (rc)
+ goto cgii_exit;
} else
goto cgii_exit;
@@ -1723,6 +1730,10 @@ cifs_do_rename(const unsigned int xid, struct dentry *from_dentry,
if (rc == 0 || rc != -EBUSY)
goto do_rename_exit;
+ /* Don't fall back to using SMB on SMB 2+ mount */
+ if (server->vals->protocol_id != 0)
+ goto do_rename_exit;
+
/* open-file renames don't work across directories */
if (to_dentry->d_parent != from_dentry->d_parent)
goto do_rename_exit;
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index bcab30d4a6c7..76f1649ab444 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -486,8 +486,7 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv)
CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2,
&pCifsInode->flags);
- queue_work(cifsoplockd_wq,
- &netfile->oplock_break);
+ cifs_queue_oplock_break(netfile);
netfile->oplock_break_cancelled = false;
spin_unlock(&tcon->open_file_lock);
@@ -584,6 +583,28 @@ void cifs_put_writer(struct cifsInodeInfo *cinode)
spin_unlock(&cinode->writers_lock);
}
+/**
+ * cifs_queue_oplock_break - queue the oplock break handler for cfile
+ *
+ * This function is called from the demultiplex thread when it
+ * receives an oplock break for @cfile.
+ *
+ * Assumes the tcon->open_file_lock is held.
+ * Assumes cfile->file_info_lock is NOT held.
+ */
+void cifs_queue_oplock_break(struct cifsFileInfo *cfile)
+{
+ /*
+ * Bump the handle refcount now while we hold the
+ * open_file_lock to enforce the validity of it for the oplock
+ * break handler. The matching put is done at the end of the
+ * handler.
+ */
+ cifsFileInfo_get(cfile);
+
+ queue_work(cifsoplockd_wq, &cfile->oplock_break);
+}
+
void cifs_done_oplock_break(struct cifsInodeInfo *cinode)
{
clear_bit(CIFS_INODE_PENDING_OPLOCK_BREAK, &cinode->flags);
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index ef24b4527459..68183872bf8b 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -655,7 +655,14 @@ find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon, loff_t pos,
/* scan and find it */
int i;
char *cur_ent;
- char *end_of_smb = cfile->srch_inf.ntwrk_buf_start +
+ char *end_of_smb;
+
+ if (cfile->srch_inf.ntwrk_buf_start == NULL) {
+ cifs_dbg(VFS, "ntwrk_buf_start is NULL during readdir\n");
+ return -EIO;
+ }
+
+ end_of_smb = cfile->srch_inf.ntwrk_buf_start +
server->ops->calc_smb_size(
cfile->srch_inf.ntwrk_buf_start);
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index d8cd82001c1c..f50d3d0b9b87 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -306,7 +306,7 @@ coalesce_t2(char *second_buf, struct smb_hdr *target_hdr)
remaining = tgt_total_cnt - total_in_tgt;
if (remaining < 0) {
- cifs_dbg(FYI, "Server sent too much data. tgt_total_cnt=%hu total_in_tgt=%hu\n",
+ cifs_dbg(FYI, "Server sent too much data. tgt_total_cnt=%hu total_in_tgt=%u\n",
tgt_total_cnt, total_in_tgt);
return -EPROTO;
}
diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c
index 79078533f807..1add404618f0 100644
--- a/fs/cifs/smb2file.c
+++ b/fs/cifs/smb2file.c
@@ -130,6 +130,8 @@ smb2_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
if (max_buf < sizeof(struct smb2_lock_element))
return -EINVAL;
+ BUILD_BUG_ON(sizeof(struct smb2_lock_element) > PAGE_SIZE);
+ max_buf = min_t(unsigned int, max_buf, PAGE_SIZE);
max_num = max_buf / sizeof(struct smb2_lock_element);
buf = kcalloc(max_num, sizeof(struct smb2_lock_element), GFP_KERNEL);
if (!buf)
@@ -266,6 +268,8 @@ smb2_push_mandatory_locks(struct cifsFileInfo *cfile)
return -EINVAL;
}
+ BUILD_BUG_ON(sizeof(struct smb2_lock_element) > PAGE_SIZE);
+ max_buf = min_t(unsigned int, max_buf, PAGE_SIZE);
max_num = max_buf / sizeof(struct smb2_lock_element);
buf = kcalloc(max_num, sizeof(struct smb2_lock_element), GFP_KERNEL);
if (!buf) {
diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c
index d7e839cb773f..92c9cdf4704d 100644
--- a/fs/cifs/smb2maperror.c
+++ b/fs/cifs/smb2maperror.c
@@ -1035,7 +1035,8 @@ static const struct status_to_posix_error smb2_error_map_table[] = {
{STATUS_UNFINISHED_CONTEXT_DELETED, -EIO,
"STATUS_UNFINISHED_CONTEXT_DELETED"},
{STATUS_NO_TGT_REPLY, -EIO, "STATUS_NO_TGT_REPLY"},
- {STATUS_OBJECTID_NOT_FOUND, -EIO, "STATUS_OBJECTID_NOT_FOUND"},
+ /* Note that ENOATTTR and ENODATA are the same errno */
+ {STATUS_OBJECTID_NOT_FOUND, -ENODATA, "STATUS_OBJECTID_NOT_FOUND"},
{STATUS_NO_IP_ADDRESSES, -EIO, "STATUS_NO_IP_ADDRESSES"},
{STATUS_WRONG_CREDENTIAL_HANDLE, -EIO,
"STATUS_WRONG_CREDENTIAL_HANDLE"},
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index efdfdb47a7dd..31f01f09d25a 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -479,7 +479,6 @@ smb2_tcon_has_lease(struct cifs_tcon *tcon, struct smb2_lease_break *rsp,
__u8 lease_state;
struct list_head *tmp;
struct cifsFileInfo *cfile;
- struct TCP_Server_Info *server = tcon->ses->server;
struct cifs_pending_open *open;
struct cifsInodeInfo *cinode;
int ack_req = le32_to_cpu(rsp->Flags &
@@ -499,14 +498,26 @@ smb2_tcon_has_lease(struct cifs_tcon *tcon, struct smb2_lease_break *rsp,
cifs_dbg(FYI, "lease key match, lease break 0x%x\n",
le32_to_cpu(rsp->NewLeaseState));
- server->ops->set_oplock_level(cinode, lease_state, 0, NULL);
-
if (ack_req)
cfile->oplock_break_cancelled = false;
else
cfile->oplock_break_cancelled = true;
- queue_work(cifsoplockd_wq, &cfile->oplock_break);
+ set_bit(CIFS_INODE_PENDING_OPLOCK_BREAK, &cinode->flags);
+
+ /*
+ * Set or clear flags depending on the lease state being READ.
+ * HANDLE caching flag should be added when the client starts
+ * to defer closing remote file handles with HANDLE leases.
+ */
+ if (lease_state & SMB2_LEASE_READ_CACHING_HE)
+ set_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2,
+ &cinode->flags);
+ else
+ clear_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2,
+ &cinode->flags);
+
+ cifs_queue_oplock_break(cfile);
kfree(lw);
return true;
}
@@ -650,8 +661,8 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2,
&cinode->flags);
spin_unlock(&cfile->file_info_lock);
- queue_work(cifsoplockd_wq,
- &cfile->oplock_break);
+
+ cifs_queue_oplock_break(cfile);
spin_unlock(&tcon->open_file_lock);
spin_unlock(&cifs_tcp_ses_lock);
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index fb1c65f93114..418062c7f040 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -1933,6 +1933,15 @@ smb2_downgrade_oplock(struct TCP_Server_Info *server,
}
static void
+smb21_downgrade_oplock(struct TCP_Server_Info *server,
+ struct cifsInodeInfo *cinode, bool set_level2)
+{
+ server->ops->set_oplock_level(cinode,
+ set_level2 ? SMB2_LEASE_READ_CACHING_HE :
+ 0, 0, NULL);
+}
+
+static void
smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock,
unsigned int epoch, bool *purge_cache)
{
@@ -2917,7 +2926,7 @@ struct smb_version_operations smb21_operations = {
.print_stats = smb2_print_stats,
.is_oplock_break = smb2_is_valid_oplock_break,
.handle_cancelled_mid = smb2_handle_cancelled_mid,
- .downgrade_oplock = smb2_downgrade_oplock,
+ .downgrade_oplock = smb21_downgrade_oplock,
.need_neg = smb2_need_neg,
.negotiate = smb2_negotiate,
.negotiate_wsize = smb2_negotiate_wsize,
@@ -3012,7 +3021,7 @@ struct smb_version_operations smb30_operations = {
.dump_share_caps = smb2_dump_share_caps,
.is_oplock_break = smb2_is_valid_oplock_break,
.handle_cancelled_mid = smb2_handle_cancelled_mid,
- .downgrade_oplock = smb2_downgrade_oplock,
+ .downgrade_oplock = smb21_downgrade_oplock,
.need_neg = smb2_need_neg,
.negotiate = smb2_negotiate,
.negotiate_wsize = smb2_negotiate_wsize,
@@ -3117,7 +3126,7 @@ struct smb_version_operations smb311_operations = {
.dump_share_caps = smb2_dump_share_caps,
.is_oplock_break = smb2_is_valid_oplock_break,
.handle_cancelled_mid = smb2_handle_cancelled_mid,
- .downgrade_oplock = smb2_downgrade_oplock,
+ .downgrade_oplock = smb21_downgrade_oplock,
.need_neg = smb2_need_neg,
.negotiate = smb2_negotiate,
.negotiate_wsize = smb2_negotiate_wsize,
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index e52454059725..bad458a2b579 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -84,8 +84,8 @@
#define NUMBER_OF_SMB2_COMMANDS 0x0013
-/* 4 len + 52 transform hdr + 64 hdr + 56 create rsp */
-#define MAX_SMB2_HDR_SIZE 0x00b0
+/* 52 transform hdr + 64 hdr + 88 create rsp */
+#define MAX_SMB2_HDR_SIZE 204
#define SMB2_PROTO_NUMBER cpu_to_le32(0x424d53fe)
#define SMB2_TRANSFORM_PROTO_NUM cpu_to_le32(0x424d53fd)
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index c59f015f386e..f4df6feec271 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -170,19 +170,24 @@ static int debugfs_show_options(struct seq_file *m, struct dentry *root)
return 0;
}
-static void debugfs_evict_inode(struct inode *inode)
+static void debugfs_i_callback(struct rcu_head *head)
{
- truncate_inode_pages_final(&inode->i_data);
- clear_inode(inode);
+ struct inode *inode = container_of(head, struct inode, i_rcu);
if (S_ISLNK(inode->i_mode))
kfree(inode->i_link);
+ free_inode_nonrcu(inode);
+}
+
+static void debugfs_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, debugfs_i_callback);
}
static const struct super_operations debugfs_super_operations = {
.statfs = simple_statfs,
.remount_fs = debugfs_remount,
.show_options = debugfs_show_options,
- .evict_inode = debugfs_evict_inode,
+ .destroy_inode = debugfs_destroy_inode,
};
static struct vfsmount *debugfs_automount(struct path *path)
@@ -766,6 +771,13 @@ struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry,
struct dentry *dentry = NULL, *trap;
struct name_snapshot old_name;
+ if (IS_ERR(old_dir))
+ return old_dir;
+ if (IS_ERR(new_dir))
+ return new_dir;
+ if (IS_ERR_OR_NULL(old_dentry))
+ return old_dentry;
+
trap = lock_rename(new_dir, old_dir);
/* Source or destination directories don't exist? */
if (d_really_is_negative(old_dir) || d_really_is_negative(new_dir))
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 542364bf923e..32f6f1c683d9 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -439,6 +439,7 @@ devpts_fill_super(struct super_block *s, void *data, int silent)
s->s_blocksize_bits = 10;
s->s_magic = DEVPTS_SUPER_MAGIC;
s->s_op = &devpts_sops;
+ s->s_d_op = &simple_dentry_operations;
s->s_time_gran = 1;
error = -ENOMEM;
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 0a1cba33adaa..2cd493494c3b 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -219,6 +219,27 @@ static inline struct page *dio_get_page(struct dio *dio,
return dio->pages[sdio->head];
}
+/*
+ * Warn about a page cache invalidation failure during a direct io write.
+ */
+void dio_warn_stale_pagecache(struct file *filp)
+{
+ static DEFINE_RATELIMIT_STATE(_rs, 86400 * HZ, DEFAULT_RATELIMIT_BURST);
+ char pathname[128];
+ struct inode *inode = file_inode(filp);
+ char *path;
+
+ errseq_set(&inode->i_mapping->wb_err, -EIO);
+ if (__ratelimit(&_rs)) {
+ path = file_path(filp, pathname, sizeof(pathname));
+ if (IS_ERR(path))
+ path = "(unknown)";
+ pr_crit("Page cache invalidation failure on direct I/O. Possible data corruption due to collision with buffered I/O!\n");
+ pr_crit("File: %s PID: %d Comm: %.20s\n", path, current->pid,
+ current->comm);
+ }
+}
+
/**
* dio_complete() - called when all DIO BIO I/O has been completed
* @offset: the byte offset in the file of the completed operation
@@ -290,7 +311,8 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, unsigned int flags)
err = invalidate_inode_pages2_range(dio->inode->i_mapping,
offset >> PAGE_SHIFT,
(offset + ret - 1) >> PAGE_SHIFT);
- WARN_ON_ONCE(err);
+ if (err)
+ dio_warn_stale_pagecache(dio->iocb->ki_filp);
}
if (!(dio->flags & DIO_SKIP_DIO_COUNT))
@@ -658,6 +680,7 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio,
unsigned long fs_count; /* Number of filesystem-sized blocks */
int create;
unsigned int i_blkbits = sdio->blkbits + sdio->blkfactor;
+ loff_t i_size;
/*
* If there was a memory error and we've overwritten all the
@@ -687,8 +710,8 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio,
*/
create = dio->op == REQ_OP_WRITE;
if (dio->flags & DIO_SKIP_HOLES) {
- if (fs_startblk <= ((i_size_read(dio->inode) - 1) >>
- i_blkbits))
+ i_size = i_size_read(dio->inode);
+ if (i_size && fs_startblk <= (i_size - 1) >> i_blkbits)
create = 0;
}
diff --git a/fs/dlm/ast.c b/fs/dlm/ast.c
index 07fed838d8fd..15fa4239ae9f 100644
--- a/fs/dlm/ast.c
+++ b/fs/dlm/ast.c
@@ -290,6 +290,8 @@ void dlm_callback_suspend(struct dlm_ls *ls)
flush_workqueue(ls->ls_callback_wq);
}
+#define MAX_CB_QUEUE 25
+
void dlm_callback_resume(struct dlm_ls *ls)
{
struct dlm_lkb *lkb, *safe;
@@ -300,15 +302,23 @@ void dlm_callback_resume(struct dlm_ls *ls)
if (!ls->ls_callback_wq)
return;
+more:
mutex_lock(&ls->ls_cb_mutex);
list_for_each_entry_safe(lkb, safe, &ls->ls_cb_delay, lkb_cb_list) {
list_del_init(&lkb->lkb_cb_list);
queue_work(ls->ls_callback_wq, &lkb->lkb_cb_work);
count++;
+ if (count == MAX_CB_QUEUE)
+ break;
}
mutex_unlock(&ls->ls_cb_mutex);
if (count)
log_rinfo(ls, "dlm_callback_resume %d", count);
+ if (count == MAX_CB_QUEUE) {
+ count = 0;
+ cond_resched();
+ goto more;
+ }
}
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index 82377017130f..d31b6c72b476 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -21,8 +21,13 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
spin_lock(&sb->s_inode_list_lock);
list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
spin_lock(&inode->i_lock);
+ /*
+ * We must skip inodes in unusual state. We may also skip
+ * inodes without pages but we deliberately won't in case
+ * we need to reschedule to avoid softlockups.
+ */
if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
- (inode->i_mapping->nrpages == 0)) {
+ (inode->i_mapping->nrpages == 0 && !need_resched())) {
spin_unlock(&inode->i_lock);
continue;
}
@@ -30,6 +35,7 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
spin_unlock(&inode->i_lock);
spin_unlock(&sb->s_inode_list_lock);
+ cond_resched();
invalidate_mapping_pages(inode->i_mapping, 0, -1);
iput(toput_inode);
toput_inode = inode;
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 2fabd19cdeea..c291bf61afb9 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1167,7 +1167,7 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v
* semantics). All the events that happen during that period of time are
* chained in ep->ovflist and requeued later on.
*/
- if (unlikely(ep->ovflist != EP_UNACTIVE_PTR)) {
+ if (ep->ovflist != EP_UNACTIVE_PTR) {
if (epi->next == EP_UNACTIVE_PTR) {
epi->next = ep->ovflist;
ep->ovflist = epi;
diff --git a/fs/exec.c b/fs/exec.c
index 0da4d748b4e6..0936b5a8199a 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -925,7 +925,7 @@ int kernel_read_file(struct file *file, void **buf, loff_t *size,
bytes = kernel_read(file, *buf + pos, i_size - pos, &pos);
if (bytes < 0) {
ret = bytes;
- goto out;
+ goto out_free;
}
if (bytes == 0)
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 726e680a3368..13f470636672 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -754,7 +754,8 @@ static loff_t ext2_max_size(int bits)
{
loff_t res = EXT2_NDIR_BLOCKS;
int meta_blocks;
- loff_t upper_limit;
+ unsigned int upper_limit;
+ unsigned int ppb = 1 << (bits-2);
/* This is calculated to be the largest file size for a
* dense, file such that the total number of
@@ -768,24 +769,34 @@ static loff_t ext2_max_size(int bits)
/* total blocks in file system block size */
upper_limit >>= (bits - 9);
+ /* Compute how many blocks we can address by block tree */
+ res += 1LL << (bits-2);
+ res += 1LL << (2*(bits-2));
+ res += 1LL << (3*(bits-2));
+ /* Does block tree limit file size? */
+ if (res < upper_limit)
+ goto check_lfs;
+ res = upper_limit;
+ /* How many metadata blocks are needed for addressing upper_limit? */
+ upper_limit -= EXT2_NDIR_BLOCKS;
/* indirect blocks */
meta_blocks = 1;
+ upper_limit -= ppb;
/* double indirect blocks */
- meta_blocks += 1 + (1LL << (bits-2));
- /* tripple indirect blocks */
- meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2)));
-
- upper_limit -= meta_blocks;
- upper_limit <<= bits;
-
- res += 1LL << (bits-2);
- res += 1LL << (2*(bits-2));
- res += 1LL << (3*(bits-2));
+ if (upper_limit < ppb * ppb) {
+ meta_blocks += 1 + DIV_ROUND_UP(upper_limit, ppb);
+ res -= meta_blocks;
+ goto check_lfs;
+ }
+ meta_blocks += 1 + ppb;
+ upper_limit -= ppb * ppb;
+ /* tripple indirect blocks for the rest */
+ meta_blocks += 1 + DIV_ROUND_UP(upper_limit, ppb) +
+ DIV_ROUND_UP(upper_limit, ppb*ppb);
+ res -= meta_blocks;
+check_lfs:
res <<= bits;
- if (res > upper_limit)
- res = upper_limit;
-
if (res > MAX_LFS_FILESIZE)
res = MAX_LFS_FILESIZE;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 02970a2e86a3..95ef26b39e69 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -426,6 +426,9 @@ struct flex_groups {
/* Flags that are appropriate for non-directories/regular files. */
#define EXT4_OTHER_FLMASK (EXT4_NODUMP_FL | EXT4_NOATIME_FL)
+/* The only flags that should be swapped */
+#define EXT4_FL_SHOULD_SWAP (EXT4_HUGE_FILE_FL | EXT4_EXTENTS_FL)
+
/* Mask out flags that are inappropriate for the given type of inode. */
static inline __u32 ext4_mask_flags(umode_t mode, __u32 flags)
{
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index 48143e32411c..1437f62d068c 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -387,7 +387,7 @@ static inline void ext4_update_inode_fsync_trans(handle_t *handle,
{
struct ext4_inode_info *ei = EXT4_I(inode);
- if (ext4_handle_valid(handle)) {
+ if (ext4_handle_valid(handle) && !is_handle_aborted(handle)) {
ei->i_sync_tid = handle->h_transaction->t_tid;
if (datasync)
ei->i_datasync_tid = handle->h_transaction->t_tid;
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 5cb9aa3ad249..1913c69498c1 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -123,7 +123,7 @@ ext4_unaligned_aio(struct inode *inode, struct iov_iter *from, loff_t pos)
struct super_block *sb = inode->i_sb;
int blockmask = sb->s_blocksize - 1;
- if (pos >= i_size_read(inode))
+ if (pos >= ALIGN(i_size_read(inode), sb->s_blocksize))
return 0;
if ((pos | iov_iter_alignment(from)) & blockmask)
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 712f00995390..5508baa11bb6 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -116,16 +116,8 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
goto out;
}
- ret = file_write_and_wait_range(file, start, end);
- if (ret)
- return ret;
-
if (!journal) {
- struct writeback_control wbc = {
- .sync_mode = WB_SYNC_ALL
- };
-
- ret = ext4_write_inode(inode, &wbc);
+ ret = __generic_file_fsync(file, start, end, datasync);
if (!ret)
ret = ext4_sync_parent(inode);
if (test_opt(inode->i_sb, BARRIER))
@@ -133,6 +125,9 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
goto out;
}
+ ret = file_write_and_wait_range(file, start, end);
+ if (ret)
+ return ret;
/*
* data=writeback,ordered:
* The caller's filemap_fdatawrite()/wait will sync the data.
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index bf7fa1507e81..e1801b288847 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -1219,6 +1219,7 @@ int ext4_ind_remove_space(handle_t *handle, struct inode *inode,
ext4_lblk_t offsets[4], offsets2[4];
Indirect chain[4], chain2[4];
Indirect *partial, *partial2;
+ Indirect *p = NULL, *p2 = NULL;
ext4_lblk_t max_block;
__le32 nr = 0, nr2 = 0;
int n = 0, n2 = 0;
@@ -1260,7 +1261,7 @@ int ext4_ind_remove_space(handle_t *handle, struct inode *inode,
}
- partial = ext4_find_shared(inode, n, offsets, chain, &nr);
+ partial = p = ext4_find_shared(inode, n, offsets, chain, &nr);
if (nr) {
if (partial == chain) {
/* Shared branch grows from the inode */
@@ -1285,13 +1286,11 @@ int ext4_ind_remove_space(handle_t *handle, struct inode *inode,
partial->p + 1,
(__le32 *)partial->bh->b_data+addr_per_block,
(chain+n-1) - partial);
- BUFFER_TRACE(partial->bh, "call brelse");
- brelse(partial->bh);
partial--;
}
end_range:
- partial2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2);
+ partial2 = p2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2);
if (nr2) {
if (partial2 == chain2) {
/*
@@ -1321,16 +1320,14 @@ end_range:
(__le32 *)partial2->bh->b_data,
partial2->p,
(chain2+n2-1) - partial2);
- BUFFER_TRACE(partial2->bh, "call brelse");
- brelse(partial2->bh);
partial2--;
}
goto do_indirects;
}
/* Punch happened within the same level (n == n2) */
- partial = ext4_find_shared(inode, n, offsets, chain, &nr);
- partial2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2);
+ partial = p = ext4_find_shared(inode, n, offsets, chain, &nr);
+ partial2 = p2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2);
/* Free top, but only if partial2 isn't its subtree. */
if (nr) {
@@ -1387,11 +1384,7 @@ end_range:
partial->p + 1,
partial2->p,
(chain+n-1) - partial);
- BUFFER_TRACE(partial->bh, "call brelse");
- brelse(partial->bh);
- BUFFER_TRACE(partial2->bh, "call brelse");
- brelse(partial2->bh);
- return 0;
+ goto cleanup;
}
/*
@@ -1406,8 +1399,6 @@ end_range:
partial->p + 1,
(__le32 *)partial->bh->b_data+addr_per_block,
(chain+n-1) - partial);
- BUFFER_TRACE(partial->bh, "call brelse");
- brelse(partial->bh);
partial--;
}
if (partial2 > chain2 && depth2 <= depth) {
@@ -1415,11 +1406,21 @@ end_range:
(__le32 *)partial2->bh->b_data,
partial2->p,
(chain2+n2-1) - partial2);
- BUFFER_TRACE(partial2->bh, "call brelse");
- brelse(partial2->bh);
partial2--;
}
}
+
+cleanup:
+ while (p && p > chain) {
+ BUFFER_TRACE(p->bh, "call brelse");
+ brelse(p->bh);
+ p--;
+ }
+ while (p2 && p2 > chain2) {
+ BUFFER_TRACE(p2->bh, "call brelse");
+ brelse(p2->bh);
+ p2--;
+ }
return 0;
do_indirects:
@@ -1427,7 +1428,7 @@ do_indirects:
switch (offsets[0]) {
default:
if (++n >= n2)
- return 0;
+ break;
nr = i_data[EXT4_IND_BLOCK];
if (nr) {
ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1);
@@ -1435,7 +1436,7 @@ do_indirects:
}
case EXT4_IND_BLOCK:
if (++n >= n2)
- return 0;
+ break;
nr = i_data[EXT4_DIND_BLOCK];
if (nr) {
ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2);
@@ -1443,7 +1444,7 @@ do_indirects:
}
case EXT4_DIND_BLOCK:
if (++n >= n2)
- return 0;
+ break;
nr = i_data[EXT4_TIND_BLOCK];
if (nr) {
ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3);
@@ -1452,5 +1453,5 @@ do_indirects:
case EXT4_TIND_BLOCK:
;
}
- return 0;
+ goto cleanup;
}
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index b2a47058e04c..3dbf4e414706 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -61,6 +61,7 @@ static void swap_inode_data(struct inode *inode1, struct inode *inode2)
loff_t isize;
struct ext4_inode_info *ei1;
struct ext4_inode_info *ei2;
+ unsigned long tmp;
ei1 = EXT4_I(inode1);
ei2 = EXT4_I(inode2);
@@ -73,7 +74,10 @@ static void swap_inode_data(struct inode *inode1, struct inode *inode2)
swap(inode1->i_mtime, inode2->i_mtime);
memswap(ei1->i_data, ei2->i_data, sizeof(ei1->i_data));
- swap(ei1->i_flags, ei2->i_flags);
+ tmp = ei1->i_flags & EXT4_FL_SHOULD_SWAP;
+ ei1->i_flags = (ei2->i_flags & EXT4_FL_SHOULD_SWAP) |
+ (ei1->i_flags & ~EXT4_FL_SHOULD_SWAP);
+ ei2->i_flags = tmp | (ei2->i_flags & ~EXT4_FL_SHOULD_SWAP);
swap(ei1->i_disksize, ei2->i_disksize);
ext4_es_remove_extent(inode1, 0, EXT_MAX_BLOCKS);
ext4_es_remove_extent(inode2, 0, EXT_MAX_BLOCKS);
@@ -936,6 +940,13 @@ resizefs_out:
if (!blk_queue_discard(q))
return -EOPNOTSUPP;
+ /*
+ * We haven't replayed the journal, so we cannot use our
+ * block-bitmap-guided storage zapping commands.
+ */
+ if (test_opt(sb, NOLOAD) && ext4_has_feature_journal(sb))
+ return -EROFS;
+
if (copy_from_user(&range, (struct fstrim_range __user *)arg,
sizeof(range)))
return -EFAULT;
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 703b516366fd..333fba05e1a5 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -907,11 +907,18 @@ static int add_new_gdb_meta_bg(struct super_block *sb,
memcpy(n_group_desc, o_group_desc,
EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *));
n_group_desc[gdb_num] = gdb_bh;
+
+ BUFFER_TRACE(gdb_bh, "get_write_access");
+ err = ext4_journal_get_write_access(handle, gdb_bh);
+ if (err) {
+ kvfree(n_group_desc);
+ brelse(gdb_bh);
+ return err;
+ }
+
EXT4_SB(sb)->s_group_desc = n_group_desc;
EXT4_SB(sb)->s_gdb_count++;
kvfree(o_group_desc);
- BUFFER_TRACE(gdb_bh, "get_write_access");
- err = ext4_journal_get_write_access(handle, gdb_bh);
return err;
}
@@ -1930,7 +1937,8 @@ retry:
le16_to_cpu(es->s_reserved_gdt_blocks);
n_group = n_desc_blocks * EXT4_DESC_PER_BLOCK(sb);
n_blocks_count = (ext4_fsblk_t)n_group *
- EXT4_BLOCKS_PER_GROUP(sb);
+ EXT4_BLOCKS_PER_GROUP(sb) +
+ le32_to_cpu(es->s_first_data_block);
n_group--; /* set to last group number */
}
@@ -2041,6 +2049,10 @@ out:
free_flex_gd(flex_gd);
if (resize_inode != NULL)
iput(resize_inode);
- ext4_msg(sb, KERN_INFO, "resized filesystem to %llu", n_blocks_count);
+ if (err)
+ ext4_warning(sb, "error (%d) occurred during "
+ "file system resize", err);
+ ext4_msg(sb, KERN_INFO, "resized filesystem to %llu",
+ ext4_blocks_count(es));
return err;
}
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 311761a6ef6d..6761e905cab0 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -828,6 +828,7 @@ int ext4_get_inode_usage(struct inode *inode, qsize_t *usage)
bh = ext4_sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl, REQ_PRIO);
if (IS_ERR(bh)) {
ret = PTR_ERR(bh);
+ bh = NULL;
goto out;
}
@@ -2905,6 +2906,7 @@ int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
if (error == -EIO)
EXT4_ERROR_INODE(inode, "block %llu read error",
EXT4_I(inode)->i_file_acl);
+ bh = NULL;
goto cleanup;
}
error = ext4_xattr_check_block(inode, bh);
@@ -3061,6 +3063,7 @@ ext4_xattr_block_cache_find(struct inode *inode,
if (IS_ERR(bh)) {
if (PTR_ERR(bh) == -ENOMEM)
return NULL;
+ bh = NULL;
EXT4_ERROR_INODE(inode, "block %lu read error",
(unsigned long)ce->e_value);
} else if (ext4_xattr_cmp(header, BHDR(bh)) == 0) {
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index 436b3a1464d9..5e4860b8bbfc 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -349,12 +349,14 @@ static int f2fs_acl_create(struct inode *dir, umode_t *mode,
return PTR_ERR(p);
clone = f2fs_acl_clone(p, GFP_NOFS);
- if (!clone)
- goto no_mem;
+ if (!clone) {
+ ret = -ENOMEM;
+ goto release_acl;
+ }
ret = f2fs_acl_create_masq(clone, mode);
if (ret < 0)
- goto no_mem_clone;
+ goto release_clone;
if (ret == 0)
posix_acl_release(clone);
@@ -368,11 +370,11 @@ static int f2fs_acl_create(struct inode *dir, umode_t *mode,
return 0;
-no_mem_clone:
+release_clone:
posix_acl_release(clone);
-no_mem:
+release_acl:
posix_acl_release(p);
- return -ENOMEM;
+ return ret;
}
int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage,
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index c68b319b07aa..3d37124eb63e 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1880,6 +1880,7 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
bool locked = false;
struct extent_info ei = {0,0,0};
int err = 0;
+ int flag;
/*
* we already allocated all the blocks, so we don't need to get
@@ -1889,9 +1890,15 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
!is_inode_flag_set(inode, FI_NO_PREALLOC))
return 0;
+ /* f2fs_lock_op avoids race between write CP and convert_inline_page */
+ if (f2fs_has_inline_data(inode) && pos + len > MAX_INLINE_DATA(inode))
+ flag = F2FS_GET_BLOCK_DEFAULT;
+ else
+ flag = F2FS_GET_BLOCK_PRE_AIO;
+
if (f2fs_has_inline_data(inode) ||
(pos & PAGE_MASK) >= i_size_read(inode)) {
- __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true);
+ __do_map_lock(sbi, flag, true);
locked = true;
}
restart:
@@ -1929,6 +1936,7 @@ restart:
f2fs_put_dnode(&dn);
__do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO,
true);
+ WARN_ON(flag != F2FS_GET_BLOCK_PRE_AIO);
locked = true;
goto restart;
}
@@ -1942,7 +1950,7 @@ out:
f2fs_put_dnode(&dn);
unlock_out:
if (locked)
- __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false);
+ __do_map_lock(sbi, flag, false);
return err;
}
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 3f1a44696036..634165fb64f1 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -2294,10 +2294,19 @@ static inline bool is_dot_dotdot(const struct qstr *str)
static inline bool f2fs_may_extent_tree(struct inode *inode)
{
- if (!test_opt(F2FS_I_SB(inode), EXTENT_CACHE) ||
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+
+ if (!test_opt(sbi, EXTENT_CACHE) ||
is_inode_flag_set(inode, FI_NO_EXTENT))
return false;
+ /*
+ * for recovered files during mount do not create extents
+ * if shrinker is not registered.
+ */
+ if (list_empty(&sbi->s_list))
+ return false;
+
return S_ISREG(inode->i_mode);
}
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 7d3189f1941c..5f549bc4e097 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -205,6 +205,9 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
trace_f2fs_sync_file_enter(inode);
+ if (S_ISDIR(inode->i_mode))
+ goto go_write;
+
/* if fdatasync is triggered, let's do in-place-update */
if (datasync || get_dirty_pages(inode) <= SM_I(sbi)->min_fsync_blocks)
set_inode_flag(inode, FI_NEED_IPU);
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 888a9dc13677..506e365cf903 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -656,6 +656,12 @@ int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx,
if (IS_ERR(ipage))
return PTR_ERR(ipage);
+ /*
+ * f2fs_readdir was protected by inode.i_rwsem, it is safe to access
+ * ipage without page's lock held.
+ */
+ unlock_page(ipage);
+
inline_dentry = inline_data_addr(inode, ipage);
make_dentry_ptr_inline(inode, &d, inline_dentry);
@@ -664,7 +670,7 @@ int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx,
if (!err)
ctx->pos = d.max;
- f2fs_put_page(ipage, 1);
+ f2fs_put_page(ipage, 0);
return err < 0 ? err : 0;
}
diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c
index 5c60fc28ec75..ec71d2e29a15 100644
--- a/fs/f2fs/shrinker.c
+++ b/fs/f2fs/shrinker.c
@@ -138,6 +138,6 @@ void f2fs_leave_shrinker(struct f2fs_sb_info *sbi)
f2fs_shrink_extent_tree(sbi, __count_extent_cache(sbi));
spin_lock(&f2fs_list_lock);
- list_del(&sbi->s_list);
+ list_del_init(&sbi->s_list);
spin_unlock(&f2fs_list_lock);
}
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index fc5c41257e68..4c169ba50c0f 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1959,7 +1959,7 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
unsigned int segment_count_main;
unsigned int cp_pack_start_sum, cp_payload;
block_t user_block_count;
- int i;
+ int i, j;
total = le32_to_cpu(raw_super->segment_count);
fsmeta = le32_to_cpu(raw_super->segment_count_ckpt);
@@ -2000,11 +2000,43 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
if (le32_to_cpu(ckpt->cur_node_segno[i]) >= main_segs ||
le16_to_cpu(ckpt->cur_node_blkoff[i]) >= blocks_per_seg)
return 1;
+ for (j = i + 1; j < NR_CURSEG_NODE_TYPE; j++) {
+ if (le32_to_cpu(ckpt->cur_node_segno[i]) ==
+ le32_to_cpu(ckpt->cur_node_segno[j])) {
+ f2fs_msg(sbi->sb, KERN_ERR,
+ "Node segment (%u, %u) has the same "
+ "segno: %u", i, j,
+ le32_to_cpu(ckpt->cur_node_segno[i]));
+ return 1;
+ }
+ }
}
for (i = 0; i < NR_CURSEG_DATA_TYPE; i++) {
if (le32_to_cpu(ckpt->cur_data_segno[i]) >= main_segs ||
le16_to_cpu(ckpt->cur_data_blkoff[i]) >= blocks_per_seg)
return 1;
+ for (j = i + 1; j < NR_CURSEG_DATA_TYPE; j++) {
+ if (le32_to_cpu(ckpt->cur_data_segno[i]) ==
+ le32_to_cpu(ckpt->cur_data_segno[j])) {
+ f2fs_msg(sbi->sb, KERN_ERR,
+ "Data segment (%u, %u) has the same "
+ "segno: %u", i, j,
+ le32_to_cpu(ckpt->cur_data_segno[i]));
+ return 1;
+ }
+ }
+ }
+ for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) {
+ for (j = i; j < NR_CURSEG_DATA_TYPE; j++) {
+ if (le32_to_cpu(ckpt->cur_node_segno[i]) ==
+ le32_to_cpu(ckpt->cur_data_segno[j])) {
+ f2fs_msg(sbi->sb, KERN_ERR,
+ "Data segment (%u) and Data segment (%u)"
+ " has the same segno: %u", i, j,
+ le32_to_cpu(ckpt->cur_node_segno[i]));
+ return 1;
+ }
+ }
}
sit_bitmap_size = le32_to_cpu(ckpt->sit_ver_bitmap_bytesize);
diff --git a/fs/f2fs/trace.c b/fs/f2fs/trace.c
index bccbbf2616d2..8ac1851a21c0 100644
--- a/fs/f2fs/trace.c
+++ b/fs/f2fs/trace.c
@@ -61,6 +61,7 @@ void f2fs_trace_pid(struct page *page)
set_page_private(page, (unsigned long)pid);
+retry:
if (radix_tree_preload(GFP_NOFS))
return;
@@ -71,7 +72,12 @@ void f2fs_trace_pid(struct page *page)
if (p)
radix_tree_delete(&pids, pid);
- f2fs_radix_tree_insert(&pids, pid, current);
+ if (radix_tree_insert(&pids, pid, current)) {
+ spin_unlock(&pids_lock);
+ radix_tree_preload_end();
+ cond_resched();
+ goto retry;
+ }
trace_printk("%3x:%3x %4x %-16s\n",
MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev),
diff --git a/fs/file.c b/fs/file.c
index 4eecbf4244a5..0c25b980affe 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -462,6 +462,7 @@ struct files_struct init_files = {
.full_fds_bits = init_files.full_fds_bits_init,
},
.file_lock = __SPIN_LOCK_UNLOCKED(init_files.file_lock),
+ .resize_wait = __WAIT_QUEUE_HEAD_INITIALIZER(init_files.resize_wait),
};
static unsigned int find_next_fd(struct fdtable *fdt, unsigned int start)
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 3244932f4d5c..6a76616c9401 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -331,11 +331,22 @@ struct inode_switch_wbs_context {
struct work_struct work;
};
+static void bdi_down_write_wb_switch_rwsem(struct backing_dev_info *bdi)
+{
+ down_write(&bdi->wb_switch_rwsem);
+}
+
+static void bdi_up_write_wb_switch_rwsem(struct backing_dev_info *bdi)
+{
+ up_write(&bdi->wb_switch_rwsem);
+}
+
static void inode_switch_wbs_work_fn(struct work_struct *work)
{
struct inode_switch_wbs_context *isw =
container_of(work, struct inode_switch_wbs_context, work);
struct inode *inode = isw->inode;
+ struct backing_dev_info *bdi = inode_to_bdi(inode);
struct address_space *mapping = inode->i_mapping;
struct bdi_writeback *old_wb = inode->i_wb;
struct bdi_writeback *new_wb = isw->new_wb;
@@ -344,6 +355,12 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
void **slot;
/*
+ * If @inode switches cgwb membership while sync_inodes_sb() is
+ * being issued, sync_inodes_sb() might miss it. Synchronize.
+ */
+ down_read(&bdi->wb_switch_rwsem);
+
+ /*
* By the time control reaches here, RCU grace period has passed
* since I_WB_SWITCH assertion and all wb stat update transactions
* between unlocked_inode_to_wb_begin/end() are guaranteed to be
@@ -435,6 +452,8 @@ skip_switch:
spin_unlock(&new_wb->list_lock);
spin_unlock(&old_wb->list_lock);
+ up_read(&bdi->wb_switch_rwsem);
+
if (switched) {
wb_wakeup(new_wb);
wb_put(old_wb);
@@ -475,9 +494,18 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
if (inode->i_state & I_WB_SWITCH)
return;
+ /*
+ * Avoid starting new switches while sync_inodes_sb() is in
+ * progress. Otherwise, if the down_write protected issue path
+ * blocks heavily, we might end up starting a large number of
+ * switches which will block on the rwsem.
+ */
+ if (!down_read_trylock(&bdi->wb_switch_rwsem))
+ return;
+
isw = kzalloc(sizeof(*isw), GFP_ATOMIC);
if (!isw)
- return;
+ goto out_unlock;
/* find and pin the new wb */
rcu_read_lock();
@@ -511,12 +539,14 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
* Let's continue after I_WB_SWITCH is guaranteed to be visible.
*/
call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn);
- return;
+ goto out_unlock;
out_free:
if (isw->new_wb)
wb_put(isw->new_wb);
kfree(isw);
+out_unlock:
+ up_read(&bdi->wb_switch_rwsem);
}
/**
@@ -894,6 +924,9 @@ fs_initcall(cgroup_writeback_init);
#else /* CONFIG_CGROUP_WRITEBACK */
+static void bdi_down_write_wb_switch_rwsem(struct backing_dev_info *bdi) { }
+static void bdi_up_write_wb_switch_rwsem(struct backing_dev_info *bdi) { }
+
static struct bdi_writeback *
locked_inode_to_wb_and_lock_list(struct inode *inode)
__releases(&inode->i_lock)
@@ -2408,8 +2441,11 @@ void sync_inodes_sb(struct super_block *sb)
return;
WARN_ON(!rwsem_is_locked(&sb->s_umount));
+ /* protect against inode wb switch, see inode_switch_wbs_work_fn() */
+ bdi_down_write_wb_switch_rwsem(bdi);
bdi_split_work_to_wbs(bdi, &work, false);
wb_wait_for_completion(bdi, &done);
+ bdi_up_write_wb_switch_rwsem(bdi);
wait_sb_inodes(sb);
}
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index f7280c44cd4b..770733106d6d 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1691,7 +1691,6 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
req->in.h.nodeid = outarg->nodeid;
req->in.numargs = 2;
req->in.argpages = 1;
- req->page_descs[0].offset = offset;
req->end = fuse_retrieve_end;
index = outarg->offset >> PAGE_SHIFT;
@@ -1706,6 +1705,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
this_num = min_t(unsigned, num, PAGE_SIZE - offset);
req->pages[req->num_pages] = page;
+ req->page_descs[req->num_pages].offset = offset;
req->page_descs[req->num_pages].length = this_num;
req->num_pages++;
@@ -1981,10 +1981,8 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
rem += pipe->bufs[(pipe->curbuf + idx) & (pipe->buffers - 1)].len;
ret = -EINVAL;
- if (rem < len) {
- pipe_unlock(pipe);
- goto out;
- }
+ if (rem < len)
+ goto out_free;
rem = len;
while (rem) {
@@ -2002,7 +2000,9 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
pipe->nrbufs--;
} else {
- pipe_buf_get(pipe, ibuf);
+ if (!pipe_buf_get(pipe, ibuf))
+ goto out_free;
+
*obuf = *ibuf;
obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
obuf->len = rem;
@@ -2024,10 +2024,12 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
ret = fuse_dev_do_write(fud, &cs, len);
+ pipe_lock(pipe);
+out_free:
for (idx = 0; idx < nbuf; idx++)
pipe_buf_release(pipe, &bufs[idx]);
+ pipe_unlock(pipe);
-out:
kfree(bufs);
return ret;
}
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 52514a64dcd6..19ea122a7d03 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1777,7 +1777,7 @@ static bool fuse_writepage_in_flight(struct fuse_req *new_req,
spin_unlock(&fc->lock);
dec_wb_stat(&bdi->wb, WB_WRITEBACK);
- dec_node_page_state(page, NR_WRITEBACK_TEMP);
+ dec_node_page_state(new_req->pages[0], NR_WRITEBACK_TEMP);
wb_writeout_inc(&bdi->wb);
fuse_writepage_free(fc, new_req);
fuse_request_free(new_req);
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 11066d8647d2..d5284d0dbdb5 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -107,7 +107,7 @@ static int glock_wake_function(wait_queue_entry_t *wait, unsigned int mode,
static wait_queue_head_t *glock_waitqueue(struct lm_lockname *name)
{
- u32 hash = jhash2((u32 *)name, sizeof(*name) / 4, 0);
+ u32 hash = jhash2((u32 *)name, ht_parms.key_len / 4, 0);
return glock_wait_table + hash_32(hash, GLOCK_WAIT_TABLE_BITS);
}
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 2a6ed036d207..dd28a9b287da 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -730,11 +730,17 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb,
umode_t mode, dev_t dev)
{
struct inode *inode;
- struct resv_map *resv_map;
+ struct resv_map *resv_map = NULL;
- resv_map = resv_map_alloc();
- if (!resv_map)
- return NULL;
+ /*
+ * Reserve maps are only needed for inodes that can have associated
+ * page allocations.
+ */
+ if (S_ISREG(mode) || S_ISLNK(mode)) {
+ resv_map = resv_map_alloc();
+ if (!resv_map)
+ return NULL;
+ }
inode = new_inode(sb);
if (inode) {
@@ -766,8 +772,10 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb,
break;
}
lockdep_annotate_inode_mutex_key(inode);
- } else
- kref_put(&resv_map->refs, resv_map_release);
+ } else {
+ if (resv_map)
+ kref_put(&resv_map->refs, resv_map_release);
+ }
return inode;
}
@@ -845,6 +853,18 @@ static int hugetlbfs_migrate_page(struct address_space *mapping,
rc = migrate_huge_page_move_mapping(mapping, newpage, page);
if (rc != MIGRATEPAGE_SUCCESS)
return rc;
+
+ /*
+ * page_private is subpool pointer in hugetlb pages. Transfer to
+ * new page. PagePrivate is not associated with page_private for
+ * hugetlb pages and can not be set here as only page_huge_active
+ * pages can be migrated.
+ */
+ if (page_private(page)) {
+ set_page_private(newpage, page_private(page));
+ set_page_private(page, 0);
+ }
+
if (mode != MIGRATE_SYNC_NO_COPY)
migrate_page_copy(newpage, page);
else
diff --git a/fs/iomap.c b/fs/iomap.c
index 6a5f3d0ce87c..4966abd956d4 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -753,7 +753,8 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio)
err = invalidate_inode_pages2_range(inode->i_mapping,
offset >> PAGE_SHIFT,
(offset + dio->size - 1) >> PAGE_SHIFT);
- WARN_ON_ONCE(err);
+ if (err)
+ dio_warn_stale_pagecache(iocb->ki_filp);
}
inode_dio_end(file_inode(iocb->ki_filp));
@@ -1010,9 +1011,16 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
if (ret)
goto out_free_dio;
+ /*
+ * Try to invalidate cache pages for the range we're direct
+ * writing. If this invalidation fails, tough, the write will
+ * still work, but racing two incompatible write paths is a
+ * pretty crazy thing to do, so we don't support it 100%.
+ */
ret = invalidate_inode_pages2_range(mapping,
start >> PAGE_SHIFT, end >> PAGE_SHIFT);
- WARN_ON_ONCE(ret);
+ if (ret)
+ dio_warn_stale_pagecache(iocb->ki_filp);
ret = 0;
if (iov_iter_rw(iter) == WRITE && !dio->wait_for_completion &&
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 3c1c31321d9b..d11401afd52f 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -693,9 +693,11 @@ void jbd2_journal_commit_transaction(journal_t *journal)
the last tag we set up. */
tag->t_flags |= cpu_to_be16(JBD2_FLAG_LAST_TAG);
-
- jbd2_descriptor_block_csum_set(journal, descriptor);
start_journal_io:
+ if (descriptor)
+ jbd2_descriptor_block_csum_set(journal,
+ descriptor);
+
for (i = 0; i < bufs; i++) {
struct buffer_head *bh = wbuf[i];
/*
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 61d48f0c41a1..0c8f77db60e2 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1343,6 +1343,10 @@ static int journal_reset(journal_t *journal)
return jbd2_journal_start_thread(journal);
}
+/*
+ * This function expects that the caller will have locked the journal
+ * buffer head, and will return with it unlocked
+ */
static int jbd2_write_superblock(journal_t *journal, int write_flags)
{
struct buffer_head *bh = journal->j_sb_buffer;
@@ -1352,7 +1356,6 @@ static int jbd2_write_superblock(journal_t *journal, int write_flags)
trace_jbd2_write_superblock(journal, write_flags);
if (!(journal->j_flags & JBD2_BARRIER))
write_flags &= ~(REQ_FUA | REQ_PREFLUSH);
- lock_buffer(bh);
if (buffer_write_io_error(bh)) {
/*
* Oh, dear. A previous attempt to write the journal
@@ -1411,6 +1414,7 @@ int jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
jbd_debug(1, "JBD2: updating superblock (start %lu, seq %u)\n",
tail_block, tail_tid);
+ lock_buffer(journal->j_sb_buffer);
sb->s_sequence = cpu_to_be32(tail_tid);
sb->s_start = cpu_to_be32(tail_block);
@@ -1441,18 +1445,17 @@ static void jbd2_mark_journal_empty(journal_t *journal, int write_op)
journal_superblock_t *sb = journal->j_superblock;
BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
- read_lock(&journal->j_state_lock);
- /* Is it already empty? */
- if (sb->s_start == 0) {
- read_unlock(&journal->j_state_lock);
+ lock_buffer(journal->j_sb_buffer);
+ if (sb->s_start == 0) { /* Is it already empty? */
+ unlock_buffer(journal->j_sb_buffer);
return;
}
+
jbd_debug(1, "JBD2: Marking journal as empty (seq %d)\n",
journal->j_tail_sequence);
sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
sb->s_start = cpu_to_be32(0);
- read_unlock(&journal->j_state_lock);
jbd2_write_superblock(journal, write_op);
@@ -1475,9 +1478,8 @@ void jbd2_journal_update_sb_errno(journal_t *journal)
journal_superblock_t *sb = journal->j_superblock;
int errcode;
- read_lock(&journal->j_state_lock);
+ lock_buffer(journal->j_sb_buffer);
errcode = journal->j_errno;
- read_unlock(&journal->j_state_lock);
if (errcode == -ESHUTDOWN)
errcode = 0;
jbd_debug(1, "JBD2: updating superblock error (errno %d)\n", errcode);
@@ -1881,28 +1883,27 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
sb = journal->j_superblock;
+ /* Load the checksum driver if necessary */
+ if ((journal->j_chksum_driver == NULL) &&
+ INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V3)) {
+ journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
+ if (IS_ERR(journal->j_chksum_driver)) {
+ printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n");
+ journal->j_chksum_driver = NULL;
+ return 0;
+ }
+ /* Precompute checksum seed for all metadata */
+ journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid,
+ sizeof(sb->s_uuid));
+ }
+
+ lock_buffer(journal->j_sb_buffer);
+
/* If enabling v3 checksums, update superblock */
if (INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V3)) {
sb->s_checksum_type = JBD2_CRC32C_CHKSUM;
sb->s_feature_compat &=
~cpu_to_be32(JBD2_FEATURE_COMPAT_CHECKSUM);
-
- /* Load the checksum driver */
- if (journal->j_chksum_driver == NULL) {
- journal->j_chksum_driver = crypto_alloc_shash("crc32c",
- 0, 0);
- if (IS_ERR(journal->j_chksum_driver)) {
- printk(KERN_ERR "JBD2: Cannot load crc32c "
- "driver.\n");
- journal->j_chksum_driver = NULL;
- return 0;
- }
-
- /* Precompute checksum seed for all metadata */
- journal->j_csum_seed = jbd2_chksum(journal, ~0,
- sb->s_uuid,
- sizeof(sb->s_uuid));
- }
}
/* If enabling v1 checksums, downgrade superblock */
@@ -1914,6 +1915,7 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
sb->s_feature_compat |= cpu_to_be32(compat);
sb->s_feature_ro_compat |= cpu_to_be32(ro);
sb->s_feature_incompat |= cpu_to_be32(incompat);
+ unlock_buffer(journal->j_sb_buffer);
return 1;
#undef COMPAT_FEATURE_ON
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index e42736c1fdc8..650927f0a2dc 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1224,11 +1224,12 @@ int jbd2_journal_get_undo_access(handle_t *handle, struct buffer_head *bh)
struct journal_head *jh;
char *committed_data = NULL;
- JBUFFER_TRACE(jh, "entry");
if (jbd2_write_access_granted(handle, bh, true))
return 0;
jh = jbd2_journal_add_journal_head(bh);
+ JBUFFER_TRACE(jh, "entry");
+
/*
* Do this first --- it can drop the journal lock, so we want to
* make sure that obtaining the committed_data is done
@@ -1339,15 +1340,17 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
if (is_handle_aborted(handle))
return -EROFS;
- if (!buffer_jbd(bh)) {
- ret = -EUCLEAN;
- goto out;
- }
+ if (!buffer_jbd(bh))
+ return -EUCLEAN;
+
/*
* We don't grab jh reference here since the buffer must be part
* of the running transaction.
*/
jh = bh2jh(bh);
+ jbd_debug(5, "journal_head %p\n", jh);
+ JBUFFER_TRACE(jh, "entry");
+
/*
* This and the following assertions are unreliable since we may see jh
* in inconsistent state unless we grab bh_state lock. But this is
@@ -1381,9 +1384,6 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
}
journal = transaction->t_journal;
- jbd_debug(5, "journal_head %p\n", jh);
- JBUFFER_TRACE(jh, "entry");
-
jbd_lock_bh_state(bh);
if (jh->b_modified == 0) {
@@ -1581,14 +1581,21 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
/* However, if the buffer is still owned by a prior
* (committing) transaction, we can't drop it yet... */
JBUFFER_TRACE(jh, "belongs to older transaction");
- /* ... but we CAN drop it from the new transaction if we
- * have also modified it since the original commit. */
+ /* ... but we CAN drop it from the new transaction through
+ * marking the buffer as freed and set j_next_transaction to
+ * the new transaction, so that not only the commit code
+ * knows it should clear dirty bits when it is done with the
+ * buffer, but also the buffer can be checkpointed only
+ * after the new transaction commits. */
- if (jh->b_next_transaction) {
- J_ASSERT(jh->b_next_transaction == transaction);
+ set_buffer_freed(bh);
+
+ if (!jh->b_next_transaction) {
spin_lock(&journal->j_list_lock);
- jh->b_next_transaction = NULL;
+ jh->b_next_transaction = transaction;
spin_unlock(&journal->j_list_lock);
+ } else {
+ J_ASSERT(jh->b_next_transaction == transaction);
/*
* only drop a reference if this transaction modified
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index 389ea53ea487..bccfc40b3a74 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -1414,11 +1414,6 @@ void jffs2_do_clear_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f)
jffs2_kill_fragtree(&f->fragtree, deleted?c:NULL);
- if (f->target) {
- kfree(f->target);
- f->target = NULL;
- }
-
fds = f->dents;
while(fds) {
fd = fds;
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 83340496645b..9a9f30eddbbb 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -47,7 +47,10 @@ static struct inode *jffs2_alloc_inode(struct super_block *sb)
static void jffs2_i_callback(struct rcu_head *head)
{
struct inode *inode = container_of(head, struct inode, i_rcu);
- kmem_cache_free(jffs2_inode_cachep, JFFS2_INODE_INFO(inode));
+ struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
+
+ kfree(f->target);
+ kmem_cache_free(jffs2_inode_cachep, f);
}
static void jffs2_destroy_inode(struct inode *inode)
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index 95a7c88baed9..5019058e0f6a 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -196,8 +196,10 @@ struct dentry *kernfs_node_dentry(struct kernfs_node *kn,
return dentry;
knparent = find_next_ancestor(kn, NULL);
- if (WARN_ON(!knparent))
+ if (WARN_ON(!knparent)) {
+ dput(dentry);
return ERR_PTR(-EINVAL);
+ }
do {
struct dentry *dtmp;
@@ -206,8 +208,10 @@ struct dentry *kernfs_node_dentry(struct kernfs_node *kn,
if (kn == knparent)
return dentry;
kntmp = find_next_ancestor(kn, knparent);
- if (WARN_ON(!kntmp))
+ if (WARN_ON(!kntmp)) {
+ dput(dentry);
return ERR_PTR(-EINVAL);
+ }
dtmp = lookup_one_len_unlocked(kntmp->name, dentry,
strlen(kntmp->name));
dput(dentry);
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 7d6ddfd60271..a98d64a6eda5 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -459,7 +459,7 @@ void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
case XPRT_TRANSPORT_RDMA:
if (retrans == NFS_UNSPEC_RETRANS)
to->to_retries = NFS_DEF_TCP_RETRANS;
- if (timeo == NFS_UNSPEC_TIMEO || to->to_retries == 0)
+ if (timeo == NFS_UNSPEC_TIMEO || to->to_initval == 0)
to->to_initval = NFS_DEF_TCP_TIMEO * HZ / 10;
if (to->to_initval > NFS_MAX_TCP_TIMEOUT)
to->to_initval = NFS_MAX_TCP_TIMEOUT;
diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c
index b6f9d84ba19b..ae2d6f220627 100644
--- a/fs/nfs/nfs4idmap.c
+++ b/fs/nfs/nfs4idmap.c
@@ -44,6 +44,7 @@
#include <linux/keyctl.h>
#include <linux/key-type.h>
#include <keys/user-type.h>
+#include <keys/request_key_auth-type.h>
#include <linux/module.h>
#include "internal.h"
@@ -59,7 +60,7 @@ static struct key_type key_type_id_resolver_legacy;
struct idmap_legacy_upcalldata {
struct rpc_pipe_msg pipe_msg;
struct idmap_msg idmap_msg;
- struct key_construction *key_cons;
+ struct key *authkey;
struct idmap *idmap;
};
@@ -384,7 +385,7 @@ static const match_table_t nfs_idmap_tokens = {
{ Opt_find_err, NULL }
};
-static int nfs_idmap_legacy_upcall(struct key_construction *, const char *, void *);
+static int nfs_idmap_legacy_upcall(struct key *, void *);
static ssize_t idmap_pipe_downcall(struct file *, const char __user *,
size_t);
static void idmap_release_pipe(struct inode *);
@@ -545,11 +546,12 @@ nfs_idmap_prepare_pipe_upcall(struct idmap *idmap,
static void
nfs_idmap_complete_pipe_upcall_locked(struct idmap *idmap, int ret)
{
- struct key_construction *cons = idmap->idmap_upcall_data->key_cons;
+ struct key *authkey = idmap->idmap_upcall_data->authkey;
kfree(idmap->idmap_upcall_data);
idmap->idmap_upcall_data = NULL;
- complete_request_key(cons, ret);
+ complete_request_key(authkey, ret);
+ key_put(authkey);
}
static void
@@ -559,15 +561,14 @@ nfs_idmap_abort_pipe_upcall(struct idmap *idmap, int ret)
nfs_idmap_complete_pipe_upcall_locked(idmap, ret);
}
-static int nfs_idmap_legacy_upcall(struct key_construction *cons,
- const char *op,
- void *aux)
+static int nfs_idmap_legacy_upcall(struct key *authkey, void *aux)
{
struct idmap_legacy_upcalldata *data;
+ struct request_key_auth *rka = get_request_key_auth(authkey);
struct rpc_pipe_msg *msg;
struct idmap_msg *im;
struct idmap *idmap = (struct idmap *)aux;
- struct key *key = cons->key;
+ struct key *key = rka->target_key;
int ret = -ENOKEY;
if (!aux)
@@ -582,7 +583,7 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons,
msg = &data->pipe_msg;
im = &data->idmap_msg;
data->idmap = idmap;
- data->key_cons = cons;
+ data->authkey = key_get(authkey);
ret = nfs_idmap_prepare_message(key->description, idmap, im, msg);
if (ret < 0)
@@ -600,7 +601,7 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons,
out2:
kfree(data);
out1:
- complete_request_key(cons, ret);
+ complete_request_key(authkey, ret);
return ret;
}
@@ -647,9 +648,10 @@ out:
static ssize_t
idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
{
+ struct request_key_auth *rka;
struct rpc_inode *rpci = RPC_I(file_inode(filp));
struct idmap *idmap = (struct idmap *)rpci->private;
- struct key_construction *cons;
+ struct key *authkey;
struct idmap_msg im;
size_t namelen_in;
int ret = -ENOKEY;
@@ -661,7 +663,8 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
if (idmap->idmap_upcall_data == NULL)
goto out_noupcall;
- cons = idmap->idmap_upcall_data->key_cons;
+ authkey = idmap->idmap_upcall_data->authkey;
+ rka = get_request_key_auth(authkey);
if (mlen != sizeof(im)) {
ret = -ENOSPC;
@@ -686,9 +689,9 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
ret = nfs_idmap_read_and_verify_message(&im,
&idmap->idmap_upcall_data->idmap_msg,
- cons->key, cons->authkey);
+ rka->target_key, authkey);
if (ret >= 0) {
- key_set_timeout(cons->key, nfs_idmap_cache_timeout);
+ key_set_timeout(rka->target_key, nfs_idmap_cache_timeout);
ret = mlen;
}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index a3b67d3b1dfb..a225f98c9903 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -914,6 +914,13 @@ nfs4_sequence_process_interrupted(struct nfs_client *client,
#endif /* !CONFIG_NFS_V4_1 */
+static void nfs41_sequence_res_init(struct nfs4_sequence_res *res)
+{
+ res->sr_timestamp = jiffies;
+ res->sr_status_flags = 0;
+ res->sr_status = 1;
+}
+
static
void nfs4_sequence_attach_slot(struct nfs4_sequence_args *args,
struct nfs4_sequence_res *res,
@@ -925,10 +932,6 @@ void nfs4_sequence_attach_slot(struct nfs4_sequence_args *args,
args->sa_slot = slot;
res->sr_slot = slot;
- res->sr_timestamp = jiffies;
- res->sr_status_flags = 0;
- res->sr_status = 1;
-
}
int nfs4_setup_sequence(struct nfs_client *client,
@@ -974,6 +977,7 @@ int nfs4_setup_sequence(struct nfs_client *client,
trace_nfs4_setup_sequence(session, args);
out_start:
+ nfs41_sequence_res_init(res);
rpc_call_start(task);
return 0;
@@ -2742,7 +2746,8 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
nfs4_schedule_stateid_recovery(server, state);
}
out:
- nfs4_sequence_free_slot(&opendata->o_res.seq_res);
+ if (!opendata->cancelled)
+ nfs4_sequence_free_slot(&opendata->o_res.seq_res);
return ret;
}
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 37f20d7a26ed..28b013d1d44a 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -988,6 +988,17 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc)
}
}
+static void
+nfs_pageio_cleanup_request(struct nfs_pageio_descriptor *desc,
+ struct nfs_page *req)
+{
+ LIST_HEAD(head);
+
+ nfs_list_remove_request(req);
+ nfs_list_add_request(req, &head);
+ desc->pg_completion_ops->error_cleanup(&head);
+}
+
/**
* nfs_pageio_add_request - Attempt to coalesce a request into a page list.
* @desc: destination io descriptor
@@ -1025,10 +1036,8 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
nfs_page_group_unlock(req);
desc->pg_moreio = 1;
nfs_pageio_doio(desc);
- if (desc->pg_error < 0)
- return 0;
- if (mirror->pg_recoalesce)
- return 0;
+ if (desc->pg_error < 0 || mirror->pg_recoalesce)
+ goto out_cleanup_subreq;
/* retry add_request for this subreq */
nfs_page_group_lock(req);
continue;
@@ -1061,6 +1070,10 @@ err_ptr:
desc->pg_error = PTR_ERR(subreq);
nfs_page_group_unlock(req);
return 0;
+out_cleanup_subreq:
+ if (req != subreq)
+ nfs_pageio_cleanup_request(desc, subreq);
+ return 0;
}
static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc)
@@ -1079,7 +1092,6 @@ static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc)
struct nfs_page *req;
req = list_first_entry(&head, struct nfs_page, wb_list);
- nfs_list_remove_request(req);
if (__nfs_pageio_add_request(desc, req))
continue;
if (desc->pg_error < 0) {
@@ -1168,11 +1180,14 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
if (nfs_pgio_has_mirroring(desc))
desc->pg_mirror_idx = midx;
if (!nfs_pageio_add_request_mirror(desc, dupreq))
- goto out_failed;
+ goto out_cleanup_subreq;
}
return 1;
+out_cleanup_subreq:
+ if (req != dupreq)
+ nfs_pageio_cleanup_request(desc, dupreq);
out_failed:
/* remember fatal errors */
if (nfs_error_is_fatal(desc->pg_error))
@@ -1198,7 +1213,7 @@ static void nfs_pageio_complete_mirror(struct nfs_pageio_descriptor *desc,
desc->pg_mirror_idx = mirror_idx;
for (;;) {
nfs_pageio_doio(desc);
- if (!mirror->pg_recoalesce)
+ if (desc->pg_error < 0 || !mirror->pg_recoalesce)
break;
if (!nfs_do_recoalesce(desc))
break;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 38de09b08e96..f464f8d9060c 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1901,6 +1901,11 @@ static int nfs_parse_devname(const char *dev_name,
size_t len;
char *end;
+ if (unlikely(!dev_name || !*dev_name)) {
+ dfprintk(MOUNT, "NFS: device name not specified\n");
+ return -EINVAL;
+ }
+
/* Is the host name protected with square brakcets? */
if (*dev_name == '[') {
end = strchr(++dev_name, ']');
@@ -2039,7 +2044,8 @@ static int nfs23_validate_mount_data(void *options,
memcpy(sap, &data->addr, sizeof(data->addr));
args->nfs_server.addrlen = sizeof(data->addr);
args->nfs_server.port = ntohs(data->addr.sin_port);
- if (!nfs_verify_server_address(sap))
+ if (sap->sa_family != AF_INET ||
+ !nfs_verify_server_address(sap))
goto out_no_address;
if (!(data->flags & NFS_MOUNT_TCP))
@@ -2401,8 +2407,7 @@ static int nfs_compare_mount_options(const struct super_block *s, const struct n
goto Ebusy;
if (a->acdirmax != b->acdirmax)
goto Ebusy;
- if (b->auth_info.flavor_len > 0 &&
- clnt_a->cl_auth->au_flavor != clnt_b->cl_auth->au_flavor)
+ if (clnt_a->cl_auth->au_flavor != clnt_b->cl_auth->au_flavor)
goto Ebusy;
return 1;
Ebusy:
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 2d956a7d5378..50ed3944d183 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -236,9 +236,9 @@ out:
}
/* A writeback failed: mark the page as bad, and invalidate the page cache */
-static void nfs_set_pageerror(struct page *page)
+static void nfs_set_pageerror(struct address_space *mapping)
{
- nfs_zap_mapping(page_file_mapping(page)->host, page_file_mapping(page));
+ nfs_zap_mapping(mapping->host, mapping);
}
/*
@@ -994,7 +994,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
nfs_list_remove_request(req);
if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) &&
(hdr->good_bytes < bytes)) {
- nfs_set_pageerror(req->wb_page);
+ nfs_set_pageerror(page_file_mapping(req->wb_page));
nfs_context_set_write_error(req->wb_context, hdr->error);
goto remove_req;
}
@@ -1330,7 +1330,8 @@ int nfs_updatepage(struct file *file, struct page *page,
unsigned int offset, unsigned int count)
{
struct nfs_open_context *ctx = nfs_file_open_context(file);
- struct inode *inode = page_file_mapping(page)->host;
+ struct address_space *mapping = page_file_mapping(page);
+ struct inode *inode = mapping->host;
int status = 0;
nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE);
@@ -1348,7 +1349,7 @@ int nfs_updatepage(struct file *file, struct page *page,
status = nfs_writepage_setup(ctx, page, offset, count);
if (status < 0)
- nfs_set_pageerror(page);
+ nfs_set_pageerror(mapping);
else
__set_page_dirty_nobuffers(page);
out:
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 1d0ce3c57d93..c0de4d6cd857 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -446,8 +446,19 @@ nfsd3_proc_readdir(struct svc_rqst *rqstp)
&resp->common, nfs3svc_encode_entry);
memcpy(resp->verf, argp->verf, 8);
resp->count = resp->buffer - argp->buffer;
- if (resp->offset)
- xdr_encode_hyper(resp->offset, argp->cookie);
+ if (resp->offset) {
+ loff_t offset = argp->cookie;
+
+ if (unlikely(resp->offset1)) {
+ /* we ended up with offset on a page boundary */
+ *resp->offset = htonl(offset >> 32);
+ *resp->offset1 = htonl(offset & 0xffffffff);
+ resp->offset1 = NULL;
+ } else {
+ xdr_encode_hyper(resp->offset, offset);
+ }
+ resp->offset = NULL;
+ }
RETURN_STATUS(nfserr);
}
@@ -516,6 +527,7 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp)
} else {
xdr_encode_hyper(resp->offset, offset);
}
+ resp->offset = NULL;
}
RETURN_STATUS(nfserr);
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index f38acd905441..ef3e7878456c 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -922,6 +922,7 @@ encode_entry(struct readdir_cd *ccd, const char *name, int namlen,
} else {
xdr_encode_hyper(cd->offset, offset64);
}
+ cd->offset = NULL;
}
/*
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 49b0a9e7ff18..80aeb19b176b 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -939,8 +939,9 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
cb->cb_seq_status = 1;
cb->cb_status = 0;
if (minorversion) {
- if (!nfsd41_cb_get_slot(clp, task))
+ if (!cb->cb_holds_slot && !nfsd41_cb_get_slot(clp, task))
return;
+ cb->cb_holds_slot = true;
}
rpc_call_start(task);
}
@@ -967,6 +968,9 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback
return true;
}
+ if (!cb->cb_holds_slot)
+ goto need_restart;
+
switch (cb->cb_seq_status) {
case 0:
/*
@@ -1004,6 +1008,7 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback
cb->cb_seq_status);
}
+ cb->cb_holds_slot = false;
clear_bit(0, &clp->cl_cb_slot_busy);
rpc_wake_up_next(&clp->cl_cb_waitq);
dprintk("%s: freed slot, new seqid=%d\n", __func__,
@@ -1211,6 +1216,7 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
cb->cb_seq_status = 1;
cb->cb_status = 0;
cb->cb_need_restart = false;
+ cb->cb_holds_slot = false;
}
void nfsd4_run_cb(struct nfsd4_callback *cb)
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 3cef6bfa09d4..94128643ec1a 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1472,8 +1472,10 @@ free_session_slots(struct nfsd4_session *ses)
{
int i;
- for (i = 0; i < ses->se_fchannel.maxreqs; i++)
+ for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
+ free_svc_cred(&ses->se_slots[i]->sl_cred);
kfree(ses->se_slots[i]);
+ }
}
/*
@@ -2331,14 +2333,18 @@ nfsd4_store_cache_entry(struct nfsd4_compoundres *resp)
dprintk("--> %s slot %p\n", __func__, slot);
+ slot->sl_flags |= NFSD4_SLOT_INITIALIZED;
slot->sl_opcnt = resp->opcnt;
slot->sl_status = resp->cstate.status;
+ free_svc_cred(&slot->sl_cred);
+ copy_cred(&slot->sl_cred, &resp->rqstp->rq_cred);
- slot->sl_flags |= NFSD4_SLOT_INITIALIZED;
- if (nfsd4_not_cached(resp)) {
- slot->sl_datalen = 0;
+ if (!nfsd4_cache_this(resp)) {
+ slot->sl_flags &= ~NFSD4_SLOT_CACHED;
return;
}
+ slot->sl_flags |= NFSD4_SLOT_CACHED;
+
base = resp->cstate.data_offset;
slot->sl_datalen = buf->len - base;
if (read_bytes_from_xdr_buf(buf, base, slot->sl_data, slot->sl_datalen))
@@ -2365,8 +2371,16 @@ nfsd4_enc_sequence_replay(struct nfsd4_compoundargs *args,
op = &args->ops[resp->opcnt - 1];
nfsd4_encode_operation(resp, op);
- /* Return nfserr_retry_uncached_rep in next operation. */
- if (args->opcnt > 1 && !(slot->sl_flags & NFSD4_SLOT_CACHETHIS)) {
+ if (slot->sl_flags & NFSD4_SLOT_CACHED)
+ return op->status;
+ if (args->opcnt == 1) {
+ /*
+ * The original operation wasn't a solo sequence--we
+ * always cache those--so this retry must not match the
+ * original:
+ */
+ op->status = nfserr_seq_false_retry;
+ } else {
op = &args->ops[resp->opcnt++];
op->status = nfserr_retry_uncached_rep;
nfsd4_encode_operation(resp, op);
@@ -3030,6 +3044,34 @@ static bool nfsd4_request_too_big(struct svc_rqst *rqstp,
return xb->len > session->se_fchannel.maxreq_sz;
}
+static bool replay_matches_cache(struct svc_rqst *rqstp,
+ struct nfsd4_sequence *seq, struct nfsd4_slot *slot)
+{
+ struct nfsd4_compoundargs *argp = rqstp->rq_argp;
+
+ if ((bool)(slot->sl_flags & NFSD4_SLOT_CACHETHIS) !=
+ (bool)seq->cachethis)
+ return false;
+ /*
+ * If there's an error than the reply can have fewer ops than
+ * the call. But if we cached a reply with *more* ops than the
+ * call you're sending us now, then this new call is clearly not
+ * really a replay of the old one:
+ */
+ if (slot->sl_opcnt < argp->opcnt)
+ return false;
+ /* This is the only check explicitly called by spec: */
+ if (!same_creds(&rqstp->rq_cred, &slot->sl_cred))
+ return false;
+ /*
+ * There may be more comparisons we could actually do, but the
+ * spec doesn't require us to catch every case where the calls
+ * don't match (that would require caching the call as well as
+ * the reply), so we don't bother.
+ */
+ return true;
+}
+
__be32
nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u)
@@ -3089,6 +3131,9 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
status = nfserr_seq_misordered;
if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED))
goto out_put_session;
+ status = nfserr_seq_false_retry;
+ if (!replay_matches_cache(rqstp, seq, slot))
+ goto out_put_session;
cstate->slot = slot;
cstate->session = session;
cstate->clp = clp;
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 6493df6b1bd5..d44402241d9e 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1126,6 +1126,8 @@ static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size)
case 'Y':
case 'y':
case '1':
+ if (!nn->nfsd_serv)
+ return -EBUSY;
nfsd4_end_grace(nn);
break;
default:
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 005c911b34ac..133d8bf62a5c 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -69,6 +69,7 @@ struct nfsd4_callback {
int cb_seq_status;
int cb_status;
bool cb_need_restart;
+ bool cb_holds_slot;
};
struct nfsd4_callback_ops {
@@ -169,11 +170,13 @@ static inline struct nfs4_delegation *delegstateid(struct nfs4_stid *s)
struct nfsd4_slot {
u32 sl_seqid;
__be32 sl_status;
+ struct svc_cred sl_cred;
u32 sl_datalen;
u16 sl_opcnt;
#define NFSD4_SLOT_INUSE (1 << 0)
#define NFSD4_SLOT_CACHETHIS (1 << 1)
#define NFSD4_SLOT_INITIALIZED (1 << 2)
+#define NFSD4_SLOT_CACHED (1 << 3)
u8 sl_flags;
char sl_data[];
};
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index aa4375eac475..f47c392cbd57 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -651,9 +651,18 @@ static inline bool nfsd4_is_solo_sequence(struct nfsd4_compoundres *resp)
return resp->opcnt == 1 && args->ops[0].opnum == OP_SEQUENCE;
}
-static inline bool nfsd4_not_cached(struct nfsd4_compoundres *resp)
+/*
+ * The session reply cache only needs to cache replies that the client
+ * actually asked us to. But it's almost free for us to cache compounds
+ * consisting of only a SEQUENCE op, so we may as well cache those too.
+ * Also, the protocol doesn't give us a convenient response in the case
+ * of a replay of a solo SEQUENCE op that wasn't cached
+ * (RETRY_UNCACHED_REP can only be returned in the second op of a
+ * compound).
+ */
+static inline bool nfsd4_cache_this(struct nfsd4_compoundres *resp)
{
- return !(resp->cstate.slot->sl_flags & NFSD4_SLOT_CACHETHIS)
+ return (resp->cstate.slot->sl_flags & NFSD4_SLOT_CACHETHIS)
|| nfsd4_is_solo_sequence(resp);
}
diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile
index 99ee093182cb..cc9b32b9db7c 100644
--- a/fs/ocfs2/Makefile
+++ b/fs/ocfs2/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-ccflags-y := -Ifs/ocfs2
+ccflags-y := -I$(src)
obj-$(CONFIG_OCFS2_FS) += \
ocfs2.o \
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index 1d098c3c00e0..9f8250df99f1 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -152,7 +152,6 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
#endif
}
- clear_buffer_uptodate(bh);
get_bh(bh); /* for end_buffer_read_sync() */
bh->b_end_io = end_buffer_read_sync;
submit_bh(REQ_OP_READ, 0, bh);
@@ -306,7 +305,6 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
continue;
}
- clear_buffer_uptodate(bh);
get_bh(bh); /* for end_buffer_read_sync() */
if (validate)
set_buffer_needs_validate(bh);
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c
index c204ac9b49e5..81a0d5d82757 100644
--- a/fs/ocfs2/cluster/nodemanager.c
+++ b/fs/ocfs2/cluster/nodemanager.c
@@ -621,13 +621,15 @@ static void o2nm_node_group_drop_item(struct config_group *group,
struct o2nm_node *node = to_o2nm_node(item);
struct o2nm_cluster *cluster = to_o2nm_cluster(group->cg_item.ci_parent);
- o2net_disconnect_node(node);
+ if (cluster->cl_nodes[node->nd_num] == node) {
+ o2net_disconnect_node(node);
- if (cluster->cl_has_local &&
- (cluster->cl_local_node == node->nd_num)) {
- cluster->cl_has_local = 0;
- cluster->cl_local_node = O2NM_INVALID_NODE_NUM;
- o2net_stop_listening(node);
+ if (cluster->cl_has_local &&
+ (cluster->cl_local_node == node->nd_num)) {
+ cluster->cl_has_local = 0;
+ cluster->cl_local_node = O2NM_INVALID_NODE_NUM;
+ o2net_stop_listening(node);
+ }
}
/* XXX call into net to stop this node from trading messages */
diff --git a/fs/ocfs2/dlm/Makefile b/fs/ocfs2/dlm/Makefile
index bd1aab1f49a4..ef2854422a6e 100644
--- a/fs/ocfs2/dlm/Makefile
+++ b/fs/ocfs2/dlm/Makefile
@@ -1,4 +1,4 @@
-ccflags-y := -Ifs/ocfs2
+ccflags-y := -I$(src)/..
obj-$(CONFIG_OCFS2_FS_O2CB) += ocfs2_dlm.o
diff --git a/fs/ocfs2/dlmfs/Makefile b/fs/ocfs2/dlmfs/Makefile
index eed3db8c5b49..33431a0296a3 100644
--- a/fs/ocfs2/dlmfs/Makefile
+++ b/fs/ocfs2/dlmfs/Makefile
@@ -1,4 +1,4 @@
-ccflags-y := -Ifs/ocfs2
+ccflags-y := -I$(src)/..
obj-$(CONFIG_OCFS2_FS) += ocfs2_dlmfs.o
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 824f407df1db..3a4e1bca5e31 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4716,22 +4716,23 @@ out:
/* Lock an inode and grab a bh pointing to the inode. */
static int ocfs2_reflink_inodes_lock(struct inode *s_inode,
- struct buffer_head **bh1,
+ struct buffer_head **bh_s,
struct inode *t_inode,
- struct buffer_head **bh2)
+ struct buffer_head **bh_t)
{
- struct inode *inode1;
- struct inode *inode2;
+ struct inode *inode1 = s_inode;
+ struct inode *inode2 = t_inode;
struct ocfs2_inode_info *oi1;
struct ocfs2_inode_info *oi2;
+ struct buffer_head *bh1 = NULL;
+ struct buffer_head *bh2 = NULL;
bool same_inode = (s_inode == t_inode);
+ bool need_swap = (inode1->i_ino > inode2->i_ino);
int status;
/* First grab the VFS and rw locks. */
lock_two_nondirectories(s_inode, t_inode);
- inode1 = s_inode;
- inode2 = t_inode;
- if (inode1->i_ino > inode2->i_ino)
+ if (need_swap)
swap(inode1, inode2);
status = ocfs2_rw_lock(inode1, 1);
@@ -4754,17 +4755,13 @@ static int ocfs2_reflink_inodes_lock(struct inode *s_inode,
trace_ocfs2_double_lock((unsigned long long)oi1->ip_blkno,
(unsigned long long)oi2->ip_blkno);
- if (*bh1)
- *bh1 = NULL;
- if (*bh2)
- *bh2 = NULL;
-
/* We always want to lock the one with the lower lockid first. */
if (oi1->ip_blkno > oi2->ip_blkno)
mlog_errno(-ENOLCK);
/* lock id1 */
- status = ocfs2_inode_lock_nested(inode1, bh1, 1, OI_LS_REFLINK_TARGET);
+ status = ocfs2_inode_lock_nested(inode1, &bh1, 1,
+ OI_LS_REFLINK_TARGET);
if (status < 0) {
if (status != -ENOENT)
mlog_errno(status);
@@ -4773,15 +4770,25 @@ static int ocfs2_reflink_inodes_lock(struct inode *s_inode,
/* lock id2 */
if (!same_inode) {
- status = ocfs2_inode_lock_nested(inode2, bh2, 1,
+ status = ocfs2_inode_lock_nested(inode2, &bh2, 1,
OI_LS_REFLINK_TARGET);
if (status < 0) {
if (status != -ENOENT)
mlog_errno(status);
goto out_cl1;
}
- } else
- *bh2 = *bh1;
+ } else {
+ bh2 = bh1;
+ }
+
+ /*
+ * If we swapped inode order above, we have to swap the buffer heads
+ * before passing them back to the caller.
+ */
+ if (need_swap)
+ swap(bh1, bh2);
+ *bh_s = bh1;
+ *bh_t = bh2;
trace_ocfs2_double_lock_end(
(unsigned long long)OCFS2_I(inode1)->ip_blkno,
@@ -4791,8 +4798,7 @@ static int ocfs2_reflink_inodes_lock(struct inode *s_inode,
out_cl1:
ocfs2_inode_unlock(inode1, 1);
- brelse(*bh1);
- *bh1 = NULL;
+ brelse(bh1);
out_rw2:
ocfs2_rw_unlock(inode2, 1);
out_i2:
diff --git a/fs/open.c b/fs/open.c
index 7ea118471dce..28a3956c4479 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -716,6 +716,12 @@ static int do_dentry_open(struct file *f,
return 0;
}
+ /* Any file opened for execve()/uselib() has to be a regular file. */
+ if (unlikely(f->f_flags & FMODE_EXEC && !S_ISREG(inode->i_mode))) {
+ error = -EACCES;
+ goto cleanup_file;
+ }
+
if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) {
error = get_write_access(inode);
if (unlikely(error))
diff --git a/fs/pipe.c b/fs/pipe.c
index 8ef7d7bef775..fa3c2c25cec5 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -194,9 +194,9 @@ EXPORT_SYMBOL(generic_pipe_buf_steal);
* in the tee() system call, when we duplicate the buffers in one
* pipe into another.
*/
-void generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
+bool generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
{
- get_page(buf->page);
+ return try_get_page(buf->page);
}
EXPORT_SYMBOL(generic_pipe_buf_get);
@@ -239,6 +239,14 @@ static const struct pipe_buf_operations anon_pipe_buf_ops = {
.get = generic_pipe_buf_get,
};
+static const struct pipe_buf_operations anon_pipe_buf_nomerge_ops = {
+ .can_merge = 0,
+ .confirm = generic_pipe_buf_confirm,
+ .release = anon_pipe_buf_release,
+ .steal = anon_pipe_buf_steal,
+ .get = generic_pipe_buf_get,
+};
+
static const struct pipe_buf_operations packet_pipe_buf_ops = {
.can_merge = 0,
.confirm = generic_pipe_buf_confirm,
@@ -247,6 +255,12 @@ static const struct pipe_buf_operations packet_pipe_buf_ops = {
.get = generic_pipe_buf_get,
};
+void pipe_buf_mark_unmergeable(struct pipe_buffer *buf)
+{
+ if (buf->ops == &anon_pipe_buf_ops)
+ buf->ops = &anon_pipe_buf_nomerge_ops;
+}
+
static ssize_t
pipe_read(struct kiocb *iocb, struct iov_iter *to)
{
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 9063738ff1f0..64695dcf89f3 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1108,10 +1108,6 @@ static int __set_oom_adj(struct file *file, int oom_adj, bool legacy)
task_lock(p);
if (!p->vfork_done && process_shares_mm(p, mm)) {
- pr_info("updating oom_score_adj for %d (%s) from %d to %d because it shares mm with %d (%s). Report if this is unexpected.\n",
- task_pid_nr(p), p->comm,
- p->signal->oom_score_adj, oom_adj,
- task_pid_nr(task), task->comm);
p->signal->oom_score_adj = oom_adj;
if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE))
p->signal->oom_score_adj_min = (short)oom_adj;
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index f69c545f5868..555698ddb943 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -1620,8 +1620,11 @@ static void drop_sysctl_table(struct ctl_table_header *header)
if (--header->nreg)
return;
- put_links(header);
- start_unregistering(header);
+ if (parent) {
+ put_links(header);
+ start_unregistering(header);
+ }
+
if (!--header->count)
kfree_rcu(header, rcu);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 2b47757c9c68..309d24118f9a 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -459,7 +459,7 @@ struct mem_size_stats {
};
static void smaps_account(struct mem_size_stats *mss, struct page *page,
- bool compound, bool young, bool dirty)
+ bool compound, bool young, bool dirty, bool locked)
{
int i, nr = compound ? 1 << compound_order(page) : 1;
unsigned long size = nr * PAGE_SIZE;
@@ -486,24 +486,31 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page,
else
mss->private_clean += size;
mss->pss += (u64)size << PSS_SHIFT;
+ if (locked)
+ mss->pss_locked += (u64)size << PSS_SHIFT;
return;
}
for (i = 0; i < nr; i++, page++) {
int mapcount = page_mapcount(page);
+ unsigned long pss = (PAGE_SIZE << PSS_SHIFT);
if (mapcount >= 2) {
if (dirty || PageDirty(page))
mss->shared_dirty += PAGE_SIZE;
else
mss->shared_clean += PAGE_SIZE;
- mss->pss += (PAGE_SIZE << PSS_SHIFT) / mapcount;
+ mss->pss += pss / mapcount;
+ if (locked)
+ mss->pss_locked += pss / mapcount;
} else {
if (dirty || PageDirty(page))
mss->private_dirty += PAGE_SIZE;
else
mss->private_clean += PAGE_SIZE;
- mss->pss += PAGE_SIZE << PSS_SHIFT;
+ mss->pss += pss;
+ if (locked)
+ mss->pss_locked += pss;
}
}
}
@@ -526,6 +533,7 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
{
struct mem_size_stats *mss = walk->private;
struct vm_area_struct *vma = walk->vma;
+ bool locked = !!(vma->vm_flags & VM_LOCKED);
struct page *page = NULL;
if (pte_present(*pte)) {
@@ -568,7 +576,7 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
if (!page)
return;
- smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte));
+ smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte), locked);
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -577,6 +585,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
{
struct mem_size_stats *mss = walk->private;
struct vm_area_struct *vma = walk->vma;
+ bool locked = !!(vma->vm_flags & VM_LOCKED);
struct page *page;
/* FOLL_DUMP will return -EFAULT on huge zero page */
@@ -591,7 +600,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
/* pass */;
else
VM_BUG_ON_PAGE(1, page);
- smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd));
+ smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd), locked);
}
#else
static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
@@ -792,11 +801,8 @@ static int show_smap(struct seq_file *m, void *v, int is_pid)
}
}
#endif
-
/* mmap_sem is held in m_start */
walk_page_vma(vma, &smaps_walk);
- if (vma->vm_flags & VM_LOCKED)
- mss->pss_locked += mss->pss;
if (!rollup_mode) {
show_map_vma(m, vma, is_pid);
@@ -1154,6 +1160,24 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
count = -EINTR;
goto out_mm;
}
+ /*
+ * Avoid to modify vma->vm_flags
+ * without locked ops while the
+ * coredump reads the vm_flags.
+ */
+ if (!mmget_still_valid(mm)) {
+ /*
+ * Silently return "count"
+ * like if get_task_mm()
+ * failed. FIXME: should this
+ * function have returned
+ * -ESRCH if get_task_mm()
+ * failed like if
+ * get_proc_task() fails?
+ */
+ up_write(&mm->mmap_sem);
+ goto out_mm;
+ }
for (vma = mm->mmap; vma; vma = vma->vm_next) {
vma->vm_flags &= ~VM_SOFTDIRTY;
vma_set_page_prot(vma);
diff --git a/fs/read_write.c b/fs/read_write.c
index 57a00ef895b2..1c3eada2fe25 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1235,6 +1235,9 @@ COMPAT_SYSCALL_DEFINE5(preadv64v2, unsigned long, fd,
const struct compat_iovec __user *,vec,
unsigned long, vlen, loff_t, pos, rwf_t, flags)
{
+ if (pos == -1)
+ return do_compat_readv(fd, vec, vlen, flags);
+
return do_compat_preadv64(fd, vec, vlen, pos, flags);
}
#endif
@@ -1341,6 +1344,9 @@ COMPAT_SYSCALL_DEFINE5(pwritev64v2, unsigned long, fd,
const struct compat_iovec __user *,vec,
unsigned long, vlen, loff_t, pos, rwf_t, flags)
{
+ if (pos == -1)
+ return do_compat_writev(fd, vec, vlen, flags);
+
return do_compat_pwritev64(fd, vec, vlen, pos, flags);
}
#endif
diff --git a/fs/splice.c b/fs/splice.c
index f3084cce0ea6..c84ac7e97e21 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -332,8 +332,8 @@ const struct pipe_buf_operations default_pipe_buf_ops = {
.get = generic_pipe_buf_get,
};
-static int generic_pipe_buf_nosteal(struct pipe_inode_info *pipe,
- struct pipe_buffer *buf)
+int generic_pipe_buf_nosteal(struct pipe_inode_info *pipe,
+ struct pipe_buffer *buf)
{
return 1;
}
@@ -1571,7 +1571,11 @@ retry:
* Get a reference to this pipe buffer,
* so we can copy the contents over.
*/
- pipe_buf_get(ipipe, ibuf);
+ if (!pipe_buf_get(ipipe, ibuf)) {
+ if (ret == 0)
+ ret = -EFAULT;
+ break;
+ }
*obuf = *ibuf;
/*
@@ -1580,6 +1584,8 @@ retry:
*/
obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
+ pipe_buf_mark_unmergeable(obuf);
+
obuf->len = len;
opipe->nrbufs++;
ibuf->offset += obuf->len;
@@ -1643,7 +1649,11 @@ static int link_pipe(struct pipe_inode_info *ipipe,
* Get a reference to this pipe buffer,
* so we can copy the contents over.
*/
- pipe_buf_get(ipipe, ibuf);
+ if (!pipe_buf_get(ipipe, ibuf)) {
+ if (ret == 0)
+ ret = -EFAULT;
+ break;
+ }
obuf = opipe->bufs + nbuf;
*obuf = *ibuf;
@@ -1654,6 +1664,8 @@ static int link_pipe(struct pipe_inode_info *ipipe,
*/
obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
+ pipe_buf_mark_unmergeable(obuf);
+
if (obuf->len > len)
obuf->len = len;
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 8dacf4f57414..28b9d7cca29b 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -1357,6 +1357,12 @@ reread:
iinfo->i_alloc_type = le16_to_cpu(fe->icbTag.flags) &
ICBTAG_FLAG_AD_MASK;
+ if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_SHORT &&
+ iinfo->i_alloc_type != ICBTAG_FLAG_AD_LONG &&
+ iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) {
+ ret = -EIO;
+ goto out;
+ }
iinfo->i_unique = 0;
iinfo->i_lenEAttr = 0;
iinfo->i_lenExtents = 0;
diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c
index 42b8c57795cb..c6ce7503a329 100644
--- a/fs/udf/truncate.c
+++ b/fs/udf/truncate.c
@@ -260,6 +260,9 @@ void udf_truncate_extents(struct inode *inode)
epos.block = eloc;
epos.bh = udf_tread(sb,
udf_get_lb_pblock(sb, &eloc, 0));
+ /* Error reading indirect block? */
+ if (!epos.bh)
+ return;
if (elen)
indirect_ext_len =
(elen + sb->s_blocksize - 1) >>
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 5f10052d2671..7a908d683258 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -627,6 +627,8 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
/* the various vma->vm_userfaultfd_ctx still points to it */
down_write(&mm->mmap_sem);
+ /* no task can run (and in turn coredump) yet */
+ VM_WARN_ON(!mmget_still_valid(mm));
for (vma = mm->mmap; vma; vma = vma->vm_next)
if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) {
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
@@ -867,6 +869,8 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
* taking the mmap_sem for writing.
*/
down_write(&mm->mmap_sem);
+ if (!mmget_still_valid(mm))
+ goto skip_mm;
prev = NULL;
for (vma = mm->mmap; vma; vma = vma->vm_next) {
cond_resched();
@@ -889,6 +893,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
vma->vm_flags = new_flags;
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
}
+skip_mm:
up_write(&mm->mmap_sem);
mmput(mm);
wakeup:
@@ -1327,6 +1332,8 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
goto out;
down_write(&mm->mmap_sem);
+ if (!mmget_still_valid(mm))
+ goto out_unlock;
vma = find_vma_prev(mm, start, &prev);
if (!vma)
goto out_unlock;
@@ -1514,6 +1521,8 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
goto out;
down_write(&mm->mmap_sem);
+ if (!mmget_still_valid(mm))
+ goto out_unlock;
vma = find_vma_prev(mm, start, &prev);
if (!vma)
goto out_unlock;
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index ea66f04f46f7..e4265db08e4b 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -212,6 +212,7 @@ xfs_attr_set(
int flags)
{
struct xfs_mount *mp = dp->i_mount;
+ struct xfs_buf *leaf_bp = NULL;
struct xfs_da_args args;
struct xfs_defer_ops dfops;
struct xfs_trans_res tres;
@@ -327,9 +328,16 @@ xfs_attr_set(
* GROT: another possible req'mt for a double-split btree op.
*/
xfs_defer_init(args.dfops, args.firstblock);
- error = xfs_attr_shortform_to_leaf(&args);
+ error = xfs_attr_shortform_to_leaf(&args, &leaf_bp);
if (error)
goto out_defer_cancel;
+ /*
+ * Prevent the leaf buffer from being unlocked so that a
+ * concurrent AIL push cannot grab the half-baked leaf
+ * buffer and run into problems with the write verifier.
+ */
+ xfs_trans_bhold(args.trans, leaf_bp);
+ xfs_defer_bjoin(args.dfops, leaf_bp);
xfs_defer_ijoin(args.dfops, dp);
error = xfs_defer_finish(&args.trans, args.dfops);
if (error)
@@ -337,13 +345,14 @@ xfs_attr_set(
/*
* Commit the leaf transformation. We'll need another (linked)
- * transaction to add the new attribute to the leaf.
+ * transaction to add the new attribute to the leaf, which
+ * means that we have to hold & join the leaf buffer here too.
*/
-
error = xfs_trans_roll_inode(&args.trans, dp);
if (error)
goto out;
-
+ xfs_trans_bjoin(args.trans, leaf_bp);
+ leaf_bp = NULL;
}
if (xfs_bmap_one_block(dp, XFS_ATTR_FORK))
@@ -374,8 +383,9 @@ xfs_attr_set(
out_defer_cancel:
xfs_defer_cancel(&dfops);
- args.trans = NULL;
out:
+ if (leaf_bp)
+ xfs_trans_brelse(args.trans, leaf_bp);
if (args.trans)
xfs_trans_cancel(args.trans);
xfs_iunlock(dp, XFS_ILOCK_EXCL);
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 40e53a4fc0a6..73a541755d5b 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -739,10 +739,13 @@ xfs_attr_shortform_getvalue(xfs_da_args_t *args)
}
/*
- * Convert from using the shortform to the leaf.
+ * Convert from using the shortform to the leaf. On success, return the
+ * buffer so that we can keep it locked until we're totally done with it.
*/
int
-xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
+xfs_attr_shortform_to_leaf(
+ struct xfs_da_args *args,
+ struct xfs_buf **leaf_bp)
{
xfs_inode_t *dp;
xfs_attr_shortform_t *sf;
@@ -821,7 +824,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
}
error = 0;
-
+ *leaf_bp = bp;
out:
kmem_free(tmpbuffer);
return error;
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h
index f7dda0c237b0..894124efb421 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.h
+++ b/fs/xfs/libxfs/xfs_attr_leaf.h
@@ -48,7 +48,8 @@ void xfs_attr_shortform_create(struct xfs_da_args *args);
void xfs_attr_shortform_add(struct xfs_da_args *args, int forkoff);
int xfs_attr_shortform_lookup(struct xfs_da_args *args);
int xfs_attr_shortform_getvalue(struct xfs_da_args *args);
-int xfs_attr_shortform_to_leaf(struct xfs_da_args *args);
+int xfs_attr_shortform_to_leaf(struct xfs_da_args *args,
+ struct xfs_buf **leaf_bp);
int xfs_attr_shortform_remove(struct xfs_da_args *args);
int xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp);
int xfs_attr_shortform_bytesfit(struct xfs_inode *dp, int bytes);
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index 072ebfe1d6ae..087fea02c389 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -249,6 +249,10 @@ xfs_defer_trans_roll(
for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++)
xfs_trans_log_inode(*tp, dop->dop_inodes[i], XFS_ILOG_CORE);
+ /* Hold the (previously bjoin'd) buffer locked across the roll. */
+ for (i = 0; i < XFS_DEFER_OPS_NR_BUFS && dop->dop_bufs[i]; i++)
+ xfs_trans_dirty_buf(*tp, dop->dop_bufs[i]);
+
trace_xfs_defer_trans_roll((*tp)->t_mountp, dop);
/* Roll the transaction. */
@@ -264,6 +268,12 @@ xfs_defer_trans_roll(
for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++)
xfs_trans_ijoin(*tp, dop->dop_inodes[i], 0);
+ /* Rejoin the buffers and dirty them so the log moves forward. */
+ for (i = 0; i < XFS_DEFER_OPS_NR_BUFS && dop->dop_bufs[i]; i++) {
+ xfs_trans_bjoin(*tp, dop->dop_bufs[i]);
+ xfs_trans_bhold(*tp, dop->dop_bufs[i]);
+ }
+
return error;
}
@@ -295,6 +305,31 @@ xfs_defer_ijoin(
}
}
+ ASSERT(0);
+ return -EFSCORRUPTED;
+}
+
+/*
+ * Add this buffer to the deferred op. Each joined buffer is relogged
+ * each time we roll the transaction.
+ */
+int
+xfs_defer_bjoin(
+ struct xfs_defer_ops *dop,
+ struct xfs_buf *bp)
+{
+ int i;
+
+ for (i = 0; i < XFS_DEFER_OPS_NR_BUFS; i++) {
+ if (dop->dop_bufs[i] == bp)
+ return 0;
+ else if (dop->dop_bufs[i] == NULL) {
+ dop->dop_bufs[i] = bp;
+ return 0;
+ }
+ }
+
+ ASSERT(0);
return -EFSCORRUPTED;
}
@@ -493,9 +528,7 @@ xfs_defer_init(
struct xfs_defer_ops *dop,
xfs_fsblock_t *fbp)
{
- dop->dop_committed = false;
- dop->dop_low = false;
- memset(&dop->dop_inodes, 0, sizeof(dop->dop_inodes));
+ memset(dop, 0, sizeof(struct xfs_defer_ops));
*fbp = NULLFSBLOCK;
INIT_LIST_HEAD(&dop->dop_intake);
INIT_LIST_HEAD(&dop->dop_pending);
diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h
index d4f046dd44bd..045beacdd37d 100644
--- a/fs/xfs/libxfs/xfs_defer.h
+++ b/fs/xfs/libxfs/xfs_defer.h
@@ -59,6 +59,7 @@ enum xfs_defer_ops_type {
};
#define XFS_DEFER_OPS_NR_INODES 2 /* join up to two inodes */
+#define XFS_DEFER_OPS_NR_BUFS 2 /* join up to two buffers */
struct xfs_defer_ops {
bool dop_committed; /* did any trans commit? */
@@ -66,8 +67,9 @@ struct xfs_defer_ops {
struct list_head dop_intake; /* unlogged pending work */
struct list_head dop_pending; /* logged pending work */
- /* relog these inodes with each roll */
+ /* relog these with each roll */
struct xfs_inode *dop_inodes[XFS_DEFER_OPS_NR_INODES];
+ struct xfs_buf *dop_bufs[XFS_DEFER_OPS_NR_BUFS];
};
void xfs_defer_add(struct xfs_defer_ops *dop, enum xfs_defer_ops_type type,
@@ -77,6 +79,7 @@ void xfs_defer_cancel(struct xfs_defer_ops *dop);
void xfs_defer_init(struct xfs_defer_ops *dop, xfs_fsblock_t *fbp);
bool xfs_defer_has_unfinished_work(struct xfs_defer_ops *dop);
int xfs_defer_ijoin(struct xfs_defer_ops *dop, struct xfs_inode *ip);
+int xfs_defer_bjoin(struct xfs_defer_ops *dop, struct xfs_buf *bp);
/* Description of a deferred type. */
struct xfs_defer_op_type {