summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/callback.c3
-rw-r--r--fs/afs/fsclient.c6
-rw-r--r--fs/afs/internal.h4
-rw-r--r--fs/afs/server.c1
-rw-r--r--fs/afs/write.c1
-rw-r--r--fs/afs/yfsclient.c2
-rw-r--r--fs/aio.c200
-rw-r--r--fs/btrfs/transaction.c49
-rw-r--r--fs/ceph/dir.c6
-rw-r--r--fs/ceph/inode.c18
-rw-r--r--fs/ceph/mds_client.c70
-rw-r--r--fs/ceph/snap.c7
-rw-r--r--fs/cifs/cifsfs.c2
-rw-r--r--fs/cifs/cifsglob.h2
-rw-r--r--fs/cifs/file.c45
-rw-r--r--fs/cifs/inode.c4
-rw-r--r--fs/cifs/misc.c48
-rw-r--r--fs/cifs/smb2maperror.c3
-rw-r--r--fs/cifs/smb2misc.c6
-rw-r--r--fs/cifs/smb2ops.c12
-rw-r--r--fs/cifs/smb2pdu.c12
-rw-r--r--fs/cifs/trace.h6
-rw-r--r--fs/dax.c15
-rw-r--r--fs/debugfs/inode.c13
-rw-r--r--fs/ext4/ioctl.c7
-rw-r--r--fs/ext4/resize.c17
-rw-r--r--fs/ext4/super.c16
-rw-r--r--fs/ext4/xattr.c3
-rw-r--r--fs/f2fs/checkpoint.c9
-rw-r--r--fs/f2fs/data.c38
-rw-r--r--fs/f2fs/dir.c2
-rw-r--r--fs/f2fs/f2fs.h34
-rw-r--r--fs/f2fs/file.c7
-rw-r--r--fs/f2fs/gc.c2
-rw-r--r--fs/f2fs/node.c6
-rw-r--r--fs/f2fs/segment.c22
-rw-r--r--fs/f2fs/super.c44
-rw-r--r--fs/f2fs/xattr.c10
-rw-r--r--fs/fuse/dev.c12
-rw-r--r--fs/hugetlbfs/inode.c20
-rw-r--r--fs/jffs2/readinode.c5
-rw-r--r--fs/jffs2/super.c5
-rw-r--r--fs/kernfs/dir.c5
-rw-r--r--fs/nfs/client.c2
-rw-r--r--fs/nfs/nfs42proc.c3
-rw-r--r--fs/nfs/nfs4file.c4
-rw-r--r--fs/nfs/super.c3
-rw-r--r--fs/nfsd/nfs4callback.c8
-rw-r--r--fs/nfsd/nfs4state.c12
-rw-r--r--fs/nfsd/state.h1
-rw-r--r--fs/notify/inotify/inotify_user.c7
-rw-r--r--fs/open.c18
-rw-r--r--fs/pipe.c4
-rw-r--r--fs/proc/kcore.c27
-rw-r--r--fs/proc/proc_sysctl.c6
-rw-r--r--fs/proc/task_mmu.c18
-rw-r--r--fs/read_write.c5
-rw-r--r--fs/splice.c16
-rw-r--r--fs/userfaultfd.c9
59 files changed, 652 insertions, 290 deletions
diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index 1c7955f5cdaf..128f2dbe256a 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -203,8 +203,7 @@ void afs_put_cb_interest(struct afs_net *net, struct afs_cb_interest *cbi)
*/
void afs_init_callback_state(struct afs_server *server)
{
- if (!test_and_clear_bit(AFS_SERVER_FL_NEW, &server->flags))
- server->cb_s_break++;
+ server->cb_s_break++;
}
/*
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index ca08c83168f5..0b37867b5c20 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -1515,8 +1515,8 @@ static int afs_fs_setattr_size64(struct afs_fs_cursor *fc, struct iattr *attr)
xdr_encode_AFS_StoreStatus(&bp, attr);
- *bp++ = 0; /* position of start of write */
- *bp++ = 0;
+ *bp++ = htonl(attr->ia_size >> 32); /* position of start of write */
+ *bp++ = htonl((u32) attr->ia_size);
*bp++ = 0; /* size of write */
*bp++ = 0;
*bp++ = htonl(attr->ia_size >> 32); /* new file length */
@@ -1564,7 +1564,7 @@ static int afs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr)
xdr_encode_AFS_StoreStatus(&bp, attr);
- *bp++ = 0; /* position of start of write */
+ *bp++ = htonl(attr->ia_size); /* position of start of write */
*bp++ = 0; /* size of write */
*bp++ = htonl(attr->ia_size); /* new file length */
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 8871b9e8645f..465526f495b0 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -475,7 +475,6 @@ struct afs_server {
time64_t put_time; /* Time at which last put */
time64_t update_at; /* Time at which to next update the record */
unsigned long flags;
-#define AFS_SERVER_FL_NEW 0 /* New server, don't inc cb_s_break */
#define AFS_SERVER_FL_NOT_READY 1 /* The record is not ready for use */
#define AFS_SERVER_FL_NOT_FOUND 2 /* VL server says no such server */
#define AFS_SERVER_FL_VL_FAIL 3 /* Failed to access VL server */
@@ -828,7 +827,7 @@ static inline struct afs_cb_interest *afs_get_cb_interest(struct afs_cb_interest
static inline unsigned int afs_calc_vnode_cb_break(struct afs_vnode *vnode)
{
- return vnode->cb_break + vnode->cb_s_break + vnode->cb_v_break;
+ return vnode->cb_break + vnode->cb_v_break;
}
static inline bool afs_cb_is_broken(unsigned int cb_break,
@@ -836,7 +835,6 @@ static inline bool afs_cb_is_broken(unsigned int cb_break,
const struct afs_cb_interest *cbi)
{
return !cbi || cb_break != (vnode->cb_break +
- cbi->server->cb_s_break +
vnode->volume->cb_v_break);
}
diff --git a/fs/afs/server.c b/fs/afs/server.c
index 642afa2e9783..65b33b6da48b 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -226,7 +226,6 @@ static struct afs_server *afs_alloc_server(struct afs_net *net,
RCU_INIT_POINTER(server->addresses, alist);
server->addr_version = alist->version;
server->uuid = *uuid;
- server->flags = (1UL << AFS_SERVER_FL_NEW);
server->update_at = ktime_get_real_seconds() + afs_server_update_delay;
rwlock_init(&server->fs_lock);
INIT_HLIST_HEAD(&server->cb_volumes);
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 72efcfcf9f95..0122d7445fba 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -264,6 +264,7 @@ static void afs_kill_pages(struct address_space *mapping,
first = page->index + 1;
lock_page(page);
generic_error_remove_page(mapping, page);
+ unlock_page(page);
}
__pagevec_release(&pv);
diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c
index 5aa57929e8c2..6e97a42d24d1 100644
--- a/fs/afs/yfsclient.c
+++ b/fs/afs/yfsclient.c
@@ -1514,7 +1514,7 @@ static int yfs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr)
bp = xdr_encode_u32(bp, 0); /* RPC flags */
bp = xdr_encode_YFSFid(bp, &vnode->fid);
bp = xdr_encode_YFS_StoreStatus(bp, attr);
- bp = xdr_encode_u64(bp, 0); /* position of start of write */
+ bp = xdr_encode_u64(bp, attr->ia_size); /* position of start of write */
bp = xdr_encode_u64(bp, 0); /* size of write */
bp = xdr_encode_u64(bp, attr->ia_size); /* new file length */
yfs_check_req(call, bp);
diff --git a/fs/aio.c b/fs/aio.c
index 3d9669d011b9..efa13410e04e 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -181,7 +181,7 @@ struct poll_iocb {
struct file *file;
struct wait_queue_head *head;
__poll_t events;
- bool woken;
+ bool done;
bool cancelled;
struct wait_queue_entry wait;
struct work_struct work;
@@ -204,8 +204,7 @@ struct aio_kiocb {
struct kioctx *ki_ctx;
kiocb_cancel_fn *ki_cancel;
- struct iocb __user *ki_user_iocb; /* user's aiocb */
- __u64 ki_user_data; /* user's data for completion */
+ struct io_event ki_res;
struct list_head ki_list; /* the aio core uses this
* for cancellation */
@@ -1022,6 +1021,9 @@ static bool get_reqs_available(struct kioctx *ctx)
/* aio_get_req
* Allocate a slot for an aio request.
* Returns NULL if no requests are free.
+ *
+ * The refcount is initialized to 2 - one for the async op completion,
+ * one for the synchronous code that does this.
*/
static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx)
{
@@ -1034,7 +1036,7 @@ static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx)
percpu_ref_get(&ctx->reqs);
req->ki_ctx = ctx;
INIT_LIST_HEAD(&req->ki_list);
- refcount_set(&req->ki_refcnt, 0);
+ refcount_set(&req->ki_refcnt, 2);
req->ki_eventfd = NULL;
return req;
}
@@ -1067,30 +1069,18 @@ out:
return ret;
}
-static inline void iocb_put(struct aio_kiocb *iocb)
+static inline void iocb_destroy(struct aio_kiocb *iocb)
{
- if (refcount_read(&iocb->ki_refcnt) == 0 ||
- refcount_dec_and_test(&iocb->ki_refcnt)) {
- if (iocb->ki_filp)
- fput(iocb->ki_filp);
- percpu_ref_put(&iocb->ki_ctx->reqs);
- kmem_cache_free(kiocb_cachep, iocb);
- }
-}
-
-static void aio_fill_event(struct io_event *ev, struct aio_kiocb *iocb,
- long res, long res2)
-{
- ev->obj = (u64)(unsigned long)iocb->ki_user_iocb;
- ev->data = iocb->ki_user_data;
- ev->res = res;
- ev->res2 = res2;
+ if (iocb->ki_filp)
+ fput(iocb->ki_filp);
+ percpu_ref_put(&iocb->ki_ctx->reqs);
+ kmem_cache_free(kiocb_cachep, iocb);
}
/* aio_complete
* Called when the io request on the given iocb is complete.
*/
-static void aio_complete(struct aio_kiocb *iocb, long res, long res2)
+static void aio_complete(struct aio_kiocb *iocb)
{
struct kioctx *ctx = iocb->ki_ctx;
struct aio_ring *ring;
@@ -1114,14 +1104,14 @@ static void aio_complete(struct aio_kiocb *iocb, long res, long res2)
ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
event = ev_page + pos % AIO_EVENTS_PER_PAGE;
- aio_fill_event(event, iocb, res, res2);
+ *event = iocb->ki_res;
kunmap_atomic(ev_page);
flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
- pr_debug("%p[%u]: %p: %p %Lx %lx %lx\n",
- ctx, tail, iocb, iocb->ki_user_iocb, iocb->ki_user_data,
- res, res2);
+ pr_debug("%p[%u]: %p: %p %Lx %Lx %Lx\n", ctx, tail, iocb,
+ (void __user *)(unsigned long)iocb->ki_res.obj,
+ iocb->ki_res.data, iocb->ki_res.res, iocb->ki_res.res2);
/* after flagging the request as done, we
* must never even look at it again
@@ -1163,7 +1153,14 @@ static void aio_complete(struct aio_kiocb *iocb, long res, long res2)
if (waitqueue_active(&ctx->wait))
wake_up(&ctx->wait);
- iocb_put(iocb);
+}
+
+static inline void iocb_put(struct aio_kiocb *iocb)
+{
+ if (refcount_dec_and_test(&iocb->ki_refcnt)) {
+ aio_complete(iocb);
+ iocb_destroy(iocb);
+ }
}
/* aio_read_events_ring
@@ -1437,7 +1434,9 @@ static void aio_complete_rw(struct kiocb *kiocb, long res, long res2)
file_end_write(kiocb->ki_filp);
}
- aio_complete(iocb, res, res2);
+ iocb->ki_res.res = res;
+ iocb->ki_res.res2 = res2;
+ iocb_put(iocb);
}
static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb)
@@ -1585,11 +1584,10 @@ static ssize_t aio_write(struct kiocb *req, const struct iocb *iocb,
static void aio_fsync_work(struct work_struct *work)
{
- struct fsync_iocb *req = container_of(work, struct fsync_iocb, work);
- int ret;
+ struct aio_kiocb *iocb = container_of(work, struct aio_kiocb, fsync.work);
- ret = vfs_fsync(req->file, req->datasync);
- aio_complete(container_of(req, struct aio_kiocb, fsync), ret, 0);
+ iocb->ki_res.res = vfs_fsync(iocb->fsync.file, iocb->fsync.datasync);
+ iocb_put(iocb);
}
static int aio_fsync(struct fsync_iocb *req, const struct iocb *iocb,
@@ -1608,11 +1606,6 @@ static int aio_fsync(struct fsync_iocb *req, const struct iocb *iocb,
return 0;
}
-static inline void aio_poll_complete(struct aio_kiocb *iocb, __poll_t mask)
-{
- aio_complete(iocb, mangle_poll(mask), 0);
-}
-
static void aio_poll_complete_work(struct work_struct *work)
{
struct poll_iocb *req = container_of(work, struct poll_iocb, work);
@@ -1638,9 +1631,11 @@ static void aio_poll_complete_work(struct work_struct *work)
return;
}
list_del_init(&iocb->ki_list);
+ iocb->ki_res.res = mangle_poll(mask);
+ req->done = true;
spin_unlock_irq(&ctx->ctx_lock);
- aio_poll_complete(iocb, mask);
+ iocb_put(iocb);
}
/* assumes we are called with irqs disabled */
@@ -1668,31 +1663,27 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
__poll_t mask = key_to_poll(key);
unsigned long flags;
- req->woken = true;
-
/* for instances that support it check for an event match first: */
- if (mask) {
- if (!(mask & req->events))
- return 0;
+ if (mask && !(mask & req->events))
+ return 0;
+
+ list_del_init(&req->wait.entry);
+ if (mask && spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) {
/*
* Try to complete the iocb inline if we can. Use
* irqsave/irqrestore because not all filesystems (e.g. fuse)
* call this function with IRQs disabled and because IRQs
* have to be disabled before ctx_lock is obtained.
*/
- if (spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) {
- list_del(&iocb->ki_list);
- spin_unlock_irqrestore(&iocb->ki_ctx->ctx_lock, flags);
-
- list_del_init(&req->wait.entry);
- aio_poll_complete(iocb, mask);
- return 1;
- }
+ list_del(&iocb->ki_list);
+ iocb->ki_res.res = mangle_poll(mask);
+ req->done = true;
+ spin_unlock_irqrestore(&iocb->ki_ctx->ctx_lock, flags);
+ iocb_put(iocb);
+ } else {
+ schedule_work(&req->work);
}
-
- list_del_init(&req->wait.entry);
- schedule_work(&req->work);
return 1;
}
@@ -1724,6 +1715,7 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
struct kioctx *ctx = aiocb->ki_ctx;
struct poll_iocb *req = &aiocb->poll;
struct aio_poll_table apt;
+ bool cancel = false;
__poll_t mask;
/* reject any unknown events outside the normal event mask. */
@@ -1737,7 +1729,7 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
req->events = demangle_poll(iocb->aio_buf) | EPOLLERR | EPOLLHUP;
req->head = NULL;
- req->woken = false;
+ req->done = false;
req->cancelled = false;
apt.pt._qproc = aio_poll_queue_proc;
@@ -1749,41 +1741,34 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
INIT_LIST_HEAD(&req->wait.entry);
init_waitqueue_func_entry(&req->wait, aio_poll_wake);
- /* one for removal from waitqueue, one for this function */
- refcount_set(&aiocb->ki_refcnt, 2);
-
mask = vfs_poll(req->file, &apt.pt) & req->events;
- if (unlikely(!req->head)) {
- /* we did not manage to set up a waitqueue, done */
- goto out;
- }
-
spin_lock_irq(&ctx->ctx_lock);
- spin_lock(&req->head->lock);
- if (req->woken) {
- /* wake_up context handles the rest */
- mask = 0;
+ if (likely(req->head)) {
+ spin_lock(&req->head->lock);
+ if (unlikely(list_empty(&req->wait.entry))) {
+ if (apt.error)
+ cancel = true;
+ apt.error = 0;
+ mask = 0;
+ }
+ if (mask || apt.error) {
+ list_del_init(&req->wait.entry);
+ } else if (cancel) {
+ WRITE_ONCE(req->cancelled, true);
+ } else if (!req->done) { /* actually waiting for an event */
+ list_add_tail(&aiocb->ki_list, &ctx->active_reqs);
+ aiocb->ki_cancel = aio_poll_cancel;
+ }
+ spin_unlock(&req->head->lock);
+ }
+ if (mask) { /* no async, we'd stolen it */
+ aiocb->ki_res.res = mangle_poll(mask);
apt.error = 0;
- } else if (mask || apt.error) {
- /* if we get an error or a mask we are done */
- WARN_ON_ONCE(list_empty(&req->wait.entry));
- list_del_init(&req->wait.entry);
- } else {
- /* actually waiting for an event */
- list_add_tail(&aiocb->ki_list, &ctx->active_reqs);
- aiocb->ki_cancel = aio_poll_cancel;
}
- spin_unlock(&req->head->lock);
spin_unlock_irq(&ctx->ctx_lock);
-
-out:
- if (unlikely(apt.error))
- return apt.error;
-
if (mask)
- aio_poll_complete(aiocb, mask);
- iocb_put(aiocb);
- return 0;
+ iocb_put(aiocb);
+ return apt.error;
}
static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb,
@@ -1842,8 +1827,10 @@ static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb,
goto out_put_req;
}
- req->ki_user_iocb = user_iocb;
- req->ki_user_data = iocb->aio_data;
+ req->ki_res.obj = (u64)(unsigned long)user_iocb;
+ req->ki_res.data = iocb->aio_data;
+ req->ki_res.res = 0;
+ req->ki_res.res2 = 0;
switch (iocb->aio_lio_opcode) {
case IOCB_CMD_PREAD:
@@ -1873,18 +1860,21 @@ static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb,
break;
}
+ /* Done with the synchronous reference */
+ iocb_put(req);
+
/*
* If ret is 0, we'd either done aio_complete() ourselves or have
* arranged for that to be done asynchronously. Anything non-zero
* means that we need to destroy req ourselves.
*/
- if (ret)
- goto out_put_req;
- return 0;
+ if (!ret)
+ return 0;
+
out_put_req:
if (req->ki_eventfd)
eventfd_ctx_put(req->ki_eventfd);
- iocb_put(req);
+ iocb_destroy(req);
out_put_reqs_available:
put_reqs_available(ctx, 1);
return ret;
@@ -1997,24 +1987,6 @@ COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id,
}
#endif
-/* lookup_kiocb
- * Finds a given iocb for cancellation.
- */
-static struct aio_kiocb *
-lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb)
-{
- struct aio_kiocb *kiocb;
-
- assert_spin_locked(&ctx->ctx_lock);
-
- /* TODO: use a hash or array, this sucks. */
- list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) {
- if (kiocb->ki_user_iocb == iocb)
- return kiocb;
- }
- return NULL;
-}
-
/* sys_io_cancel:
* Attempts to cancel an iocb previously passed to io_submit. If
* the operation is successfully cancelled, the resulting event is
@@ -2032,6 +2004,7 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
struct aio_kiocb *kiocb;
int ret = -EINVAL;
u32 key;
+ u64 obj = (u64)(unsigned long)iocb;
if (unlikely(get_user(key, &iocb->aio_key)))
return -EFAULT;
@@ -2043,10 +2016,13 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
return -EINVAL;
spin_lock_irq(&ctx->ctx_lock);
- kiocb = lookup_kiocb(ctx, iocb);
- if (kiocb) {
- ret = kiocb->ki_cancel(&kiocb->rw);
- list_del_init(&kiocb->ki_list);
+ /* TODO: use a hash or array, this sucks. */
+ list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) {
+ if (kiocb->ki_res.obj == obj) {
+ ret = kiocb->ki_cancel(&kiocb->rw);
+ list_del_init(&kiocb->ki_list);
+ break;
+ }
}
spin_unlock_irq(&ctx->ctx_lock);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 4ec2b660d014..7f3ece91a4d0 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1886,8 +1886,10 @@ static void btrfs_cleanup_pending_block_groups(struct btrfs_trans_handle *trans)
}
}
-static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
+static inline int btrfs_start_delalloc_flush(struct btrfs_trans_handle *trans)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
+
/*
* We use writeback_inodes_sb here because if we used
* btrfs_start_delalloc_roots we would deadlock with fs freeze.
@@ -1897,15 +1899,50 @@ static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
* from already being in a transaction and our join_transaction doesn't
* have to re-take the fs freeze lock.
*/
- if (btrfs_test_opt(fs_info, FLUSHONCOMMIT))
+ if (btrfs_test_opt(fs_info, FLUSHONCOMMIT)) {
writeback_inodes_sb(fs_info->sb, WB_REASON_SYNC);
+ } else {
+ struct btrfs_pending_snapshot *pending;
+ struct list_head *head = &trans->transaction->pending_snapshots;
+
+ /*
+ * Flush dellaloc for any root that is going to be snapshotted.
+ * This is done to avoid a corrupted version of files, in the
+ * snapshots, that had both buffered and direct IO writes (even
+ * if they were done sequentially) due to an unordered update of
+ * the inode's size on disk.
+ */
+ list_for_each_entry(pending, head, list) {
+ int ret;
+
+ ret = btrfs_start_delalloc_snapshot(pending->root);
+ if (ret)
+ return ret;
+ }
+ }
return 0;
}
-static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info)
+static inline void btrfs_wait_delalloc_flush(struct btrfs_trans_handle *trans)
{
- if (btrfs_test_opt(fs_info, FLUSHONCOMMIT))
+ struct btrfs_fs_info *fs_info = trans->fs_info;
+
+ if (btrfs_test_opt(fs_info, FLUSHONCOMMIT)) {
btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
+ } else {
+ struct btrfs_pending_snapshot *pending;
+ struct list_head *head = &trans->transaction->pending_snapshots;
+
+ /*
+ * Wait for any dellaloc that we started previously for the roots
+ * that are going to be snapshotted. This is to avoid a corrupted
+ * version of files in the snapshots that had both buffered and
+ * direct IO writes (even if they were done sequentially).
+ */
+ list_for_each_entry(pending, head, list)
+ btrfs_wait_ordered_extents(pending->root,
+ U64_MAX, 0, U64_MAX);
+ }
}
int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
@@ -2024,7 +2061,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
extwriter_counter_dec(cur_trans, trans->type);
- ret = btrfs_start_delalloc_flush(fs_info);
+ ret = btrfs_start_delalloc_flush(trans);
if (ret)
goto cleanup_transaction;
@@ -2040,7 +2077,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
if (ret)
goto cleanup_transaction;
- btrfs_wait_delalloc_flush(fs_info);
+ btrfs_wait_delalloc_flush(trans);
btrfs_scrub_pause(fs_info);
/*
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 82928cea0209..7f3f64ba464f 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -1470,6 +1470,7 @@ void ceph_dentry_lru_del(struct dentry *dn)
unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn)
{
struct ceph_inode_info *dci = ceph_inode(dir);
+ unsigned hash;
switch (dci->i_dir_layout.dl_dir_hash) {
case 0: /* for backward compat */
@@ -1477,8 +1478,11 @@ unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn)
return dn->d_name.hash;
default:
- return ceph_str_hash(dci->i_dir_layout.dl_dir_hash,
+ spin_lock(&dn->d_lock);
+ hash = ceph_str_hash(dci->i_dir_layout.dl_dir_hash,
dn->d_name.name, dn->d_name.len);
+ spin_unlock(&dn->d_lock);
+ return hash;
}
}
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 9d1f34d46627..fd3db2e112d6 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -524,6 +524,7 @@ static void ceph_i_callback(struct rcu_head *head)
struct inode *inode = container_of(head, struct inode, i_rcu);
struct ceph_inode_info *ci = ceph_inode(inode);
+ kfree(ci->i_symlink);
kmem_cache_free(ceph_inode_cachep, ci);
}
@@ -561,7 +562,6 @@ void ceph_destroy_inode(struct inode *inode)
ceph_put_snap_realm(mdsc, realm);
}
- kfree(ci->i_symlink);
while ((n = rb_first(&ci->i_fragtree)) != NULL) {
frag = rb_entry(n, struct ceph_inode_frag, node);
rb_erase(n, &ci->i_fragtree);
@@ -1152,6 +1152,19 @@ static int splice_dentry(struct dentry **pdn, struct inode *in)
return 0;
}
+static int d_name_cmp(struct dentry *dentry, const char *name, size_t len)
+{
+ int ret;
+
+ /* take d_lock to ensure dentry->d_name stability */
+ spin_lock(&dentry->d_lock);
+ ret = dentry->d_name.len - len;
+ if (!ret)
+ ret = memcmp(dentry->d_name.name, name, len);
+ spin_unlock(&dentry->d_lock);
+ return ret;
+}
+
/*
* Incorporate results into the local cache. This is either just
* one inode, or a directory, dentry, and possibly linked-to inode (e.g.,
@@ -1401,7 +1414,8 @@ retry_lookup:
err = splice_dentry(&req->r_dentry, in);
if (err < 0)
goto done;
- } else if (rinfo->head->is_dentry) {
+ } else if (rinfo->head->is_dentry &&
+ !d_name_cmp(req->r_dentry, rinfo->dname, rinfo->dname_len)) {
struct ceph_vino *ptvino = NULL;
if ((le32_to_cpu(rinfo->diri.in->cap.caps) & CEPH_CAP_FILE_SHARED) ||
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 163fc74bf221..5cec784e30f6 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1286,6 +1286,15 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
list_add(&ci->i_prealloc_cap_flush->i_list, &to_remove);
ci->i_prealloc_cap_flush = NULL;
}
+
+ if (drop &&
+ ci->i_wrbuffer_ref_head == 0 &&
+ ci->i_wr_ref == 0 &&
+ ci->i_dirty_caps == 0 &&
+ ci->i_flushing_caps == 0) {
+ ceph_put_snap_context(ci->i_head_snapc);
+ ci->i_head_snapc = NULL;
+ }
}
spin_unlock(&ci->i_ceph_lock);
while (!list_empty(&to_remove)) {
@@ -1958,10 +1967,39 @@ retry:
return path;
}
+/* Duplicate the dentry->d_name.name safely */
+static int clone_dentry_name(struct dentry *dentry, const char **ppath,
+ int *ppathlen)
+{
+ u32 len;
+ char *name;
+
+retry:
+ len = READ_ONCE(dentry->d_name.len);
+ name = kmalloc(len + 1, GFP_NOFS);
+ if (!name)
+ return -ENOMEM;
+
+ spin_lock(&dentry->d_lock);
+ if (dentry->d_name.len != len) {
+ spin_unlock(&dentry->d_lock);
+ kfree(name);
+ goto retry;
+ }
+ memcpy(name, dentry->d_name.name, len);
+ spin_unlock(&dentry->d_lock);
+
+ name[len] = '\0';
+ *ppath = name;
+ *ppathlen = len;
+ return 0;
+}
+
static int build_dentry_path(struct dentry *dentry, struct inode *dir,
const char **ppath, int *ppathlen, u64 *pino,
- int *pfreepath)
+ bool *pfreepath, bool parent_locked)
{
+ int ret;
char *path;
rcu_read_lock();
@@ -1970,8 +2008,15 @@ static int build_dentry_path(struct dentry *dentry, struct inode *dir,
if (dir && ceph_snap(dir) == CEPH_NOSNAP) {
*pino = ceph_ino(dir);
rcu_read_unlock();
- *ppath = dentry->d_name.name;
- *ppathlen = dentry->d_name.len;
+ if (parent_locked) {
+ *ppath = dentry->d_name.name;
+ *ppathlen = dentry->d_name.len;
+ } else {
+ ret = clone_dentry_name(dentry, ppath, ppathlen);
+ if (ret)
+ return ret;
+ *pfreepath = true;
+ }
return 0;
}
rcu_read_unlock();
@@ -1979,13 +2024,13 @@ static int build_dentry_path(struct dentry *dentry, struct inode *dir,
if (IS_ERR(path))
return PTR_ERR(path);
*ppath = path;
- *pfreepath = 1;
+ *pfreepath = true;
return 0;
}
static int build_inode_path(struct inode *inode,
const char **ppath, int *ppathlen, u64 *pino,
- int *pfreepath)
+ bool *pfreepath)
{
struct dentry *dentry;
char *path;
@@ -2001,7 +2046,7 @@ static int build_inode_path(struct inode *inode,
if (IS_ERR(path))
return PTR_ERR(path);
*ppath = path;
- *pfreepath = 1;
+ *pfreepath = true;
return 0;
}
@@ -2012,7 +2057,7 @@ static int build_inode_path(struct inode *inode,
static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
struct inode *rdiri, const char *rpath,
u64 rino, const char **ppath, int *pathlen,
- u64 *ino, int *freepath)
+ u64 *ino, bool *freepath, bool parent_locked)
{
int r = 0;
@@ -2022,7 +2067,7 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
ceph_snap(rinode));
} else if (rdentry) {
r = build_dentry_path(rdentry, rdiri, ppath, pathlen, ino,
- freepath);
+ freepath, parent_locked);
dout(" dentry %p %llx/%.*s\n", rdentry, *ino, *pathlen,
*ppath);
} else if (rpath || rino) {
@@ -2048,7 +2093,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
const char *path2 = NULL;
u64 ino1 = 0, ino2 = 0;
int pathlen1 = 0, pathlen2 = 0;
- int freepath1 = 0, freepath2 = 0;
+ bool freepath1 = false, freepath2 = false;
int len;
u16 releases;
void *p, *end;
@@ -2056,16 +2101,19 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
ret = set_request_path_attr(req->r_inode, req->r_dentry,
req->r_parent, req->r_path1, req->r_ino1.ino,
- &path1, &pathlen1, &ino1, &freepath1);
+ &path1, &pathlen1, &ino1, &freepath1,
+ test_bit(CEPH_MDS_R_PARENT_LOCKED,
+ &req->r_req_flags));
if (ret < 0) {
msg = ERR_PTR(ret);
goto out;
}
+ /* If r_old_dentry is set, then assume that its parent is locked */
ret = set_request_path_attr(NULL, req->r_old_dentry,
req->r_old_dentry_dir,
req->r_path2, req->r_ino2.ino,
- &path2, &pathlen2, &ino2, &freepath2);
+ &path2, &pathlen2, &ino2, &freepath2, true);
if (ret < 0) {
msg = ERR_PTR(ret);
goto out_free1;
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index f74193da0e09..1f46b02f7314 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -568,7 +568,12 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
old_snapc = NULL;
update_snapc:
- if (ci->i_head_snapc) {
+ if (ci->i_wrbuffer_ref_head == 0 &&
+ ci->i_wr_ref == 0 &&
+ ci->i_dirty_caps == 0 &&
+ ci->i_flushing_caps == 0) {
+ ci->i_head_snapc = NULL;
+ } else {
ci->i_head_snapc = ceph_get_snap_context(new_snapc);
dout(" new snapc is %p\n", new_snapc);
}
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 07cad54b84f1..e8e125acd712 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -1010,7 +1010,7 @@ static loff_t cifs_remap_file_range(struct file *src_file, loff_t off,
unsigned int xid;
int rc;
- if (remap_flags & ~REMAP_FILE_ADVISORY)
+ if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
return -EINVAL;
cifs_dbg(FYI, "clone range\n");
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 6c934ab3722b..10ead04346ee 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -1303,6 +1303,7 @@ cifsFileInfo_get_locked(struct cifsFileInfo *cifs_file)
}
struct cifsFileInfo *cifsFileInfo_get(struct cifsFileInfo *cifs_file);
+void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_hdlr);
void cifsFileInfo_put(struct cifsFileInfo *cifs_file);
#define CIFS_CACHE_READ_FLG 1
@@ -1824,6 +1825,7 @@ GLOBAL_EXTERN spinlock_t gidsidlock;
#endif /* CONFIG_CIFS_ACL */
void cifs_oplock_break(struct work_struct *work);
+void cifs_queue_oplock_break(struct cifsFileInfo *cfile);
extern const struct slow_work_ops cifs_oplock_break_ops;
extern struct workqueue_struct *cifsiod_wq;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 8d107587208f..7c3f9d00586e 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -360,13 +360,31 @@ cifsFileInfo_get(struct cifsFileInfo *cifs_file)
return cifs_file;
}
-/*
- * Release a reference on the file private data. This may involve closing
- * the filehandle out on the server. Must be called without holding
- * tcon->open_file_lock and cifs_file->file_info_lock.
+/**
+ * cifsFileInfo_put - release a reference of file priv data
+ *
+ * Always potentially wait for oplock handler. See _cifsFileInfo_put().
*/
void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
{
+ _cifsFileInfo_put(cifs_file, true);
+}
+
+/**
+ * _cifsFileInfo_put - release a reference of file priv data
+ *
+ * This may involve closing the filehandle @cifs_file out on the
+ * server. Must be called without holding tcon->open_file_lock and
+ * cifs_file->file_info_lock.
+ *
+ * If @wait_for_oplock_handler is true and we are releasing the last
+ * reference, wait for any running oplock break handler of the file
+ * and cancel any pending one. If calling this function from the
+ * oplock break handler, you need to pass false.
+ *
+ */
+void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_handler)
+{
struct inode *inode = d_inode(cifs_file->dentry);
struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
struct TCP_Server_Info *server = tcon->ses->server;
@@ -414,7 +432,8 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
spin_unlock(&tcon->open_file_lock);
- oplock_break_cancelled = cancel_work_sync(&cifs_file->oplock_break);
+ oplock_break_cancelled = wait_oplock_handler ?
+ cancel_work_sync(&cifs_file->oplock_break) : false;
if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
struct TCP_Server_Info *server = tcon->ses->server;
@@ -2777,7 +2796,6 @@ static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
struct cifs_tcon *tcon;
struct cifs_sb_info *cifs_sb;
struct dentry *dentry = ctx->cfile->dentry;
- unsigned int i;
int rc;
tcon = tlink_tcon(ctx->cfile->tlink);
@@ -2841,10 +2859,6 @@ restart_loop:
kref_put(&wdata->refcount, cifs_uncached_writedata_release);
}
- if (!ctx->direct_io)
- for (i = 0; i < ctx->npages; i++)
- put_page(ctx->bv[i].bv_page);
-
cifs_stats_bytes_written(tcon, ctx->total_len);
set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
@@ -3453,7 +3467,6 @@ collect_uncached_read_data(struct cifs_aio_ctx *ctx)
struct iov_iter *to = &ctx->iter;
struct cifs_sb_info *cifs_sb;
struct cifs_tcon *tcon;
- unsigned int i;
int rc;
tcon = tlink_tcon(ctx->cfile->tlink);
@@ -3537,15 +3550,8 @@ again:
kref_put(&rdata->refcount, cifs_uncached_readdata_release);
}
- if (!ctx->direct_io) {
- for (i = 0; i < ctx->npages; i++) {
- if (ctx->should_dirty)
- set_page_dirty(ctx->bv[i].bv_page);
- put_page(ctx->bv[i].bv_page);
- }
-
+ if (!ctx->direct_io)
ctx->total_len = ctx->len - iov_iter_count(to);
- }
cifs_stats_bytes_read(tcon, ctx->total_len);
@@ -4480,6 +4486,7 @@ void cifs_oplock_break(struct work_struct *work)
cinode);
cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
}
+ _cifsFileInfo_put(cfile, false /* do not wait for ourself */);
cifs_done_oplock_break(cinode);
}
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 53fdb5df0d2e..538fd7d807e4 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1735,6 +1735,10 @@ cifs_do_rename(const unsigned int xid, struct dentry *from_dentry,
if (rc == 0 || rc != -EBUSY)
goto do_rename_exit;
+ /* Don't fall back to using SMB on SMB 2+ mount */
+ if (server->vals->protocol_id != 0)
+ goto do_rename_exit;
+
/* open-file renames don't work across directories */
if (to_dentry->d_parent != from_dentry->d_parent)
goto do_rename_exit;
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index bee203055b30..0dc6f08020ac 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -501,8 +501,7 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv)
CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2,
&pCifsInode->flags);
- queue_work(cifsoplockd_wq,
- &netfile->oplock_break);
+ cifs_queue_oplock_break(netfile);
netfile->oplock_break_cancelled = false;
spin_unlock(&tcon->open_file_lock);
@@ -607,6 +606,28 @@ void cifs_put_writer(struct cifsInodeInfo *cinode)
spin_unlock(&cinode->writers_lock);
}
+/**
+ * cifs_queue_oplock_break - queue the oplock break handler for cfile
+ *
+ * This function is called from the demultiplex thread when it
+ * receives an oplock break for @cfile.
+ *
+ * Assumes the tcon->open_file_lock is held.
+ * Assumes cfile->file_info_lock is NOT held.
+ */
+void cifs_queue_oplock_break(struct cifsFileInfo *cfile)
+{
+ /*
+ * Bump the handle refcount now while we hold the
+ * open_file_lock to enforce the validity of it for the oplock
+ * break handler. The matching put is done at the end of the
+ * handler.
+ */
+ cifsFileInfo_get(cfile);
+
+ queue_work(cifsoplockd_wq, &cfile->oplock_break);
+}
+
void cifs_done_oplock_break(struct cifsInodeInfo *cinode)
{
clear_bit(CIFS_INODE_PENDING_OPLOCK_BREAK, &cinode->flags);
@@ -768,6 +789,11 @@ cifs_aio_ctx_alloc(void)
{
struct cifs_aio_ctx *ctx;
+ /*
+ * Must use kzalloc to initialize ctx->bv to NULL and ctx->direct_io
+ * to false so that we know when we have to unreference pages within
+ * cifs_aio_ctx_release()
+ */
ctx = kzalloc(sizeof(struct cifs_aio_ctx), GFP_KERNEL);
if (!ctx)
return NULL;
@@ -786,7 +812,23 @@ cifs_aio_ctx_release(struct kref *refcount)
struct cifs_aio_ctx, refcount);
cifsFileInfo_put(ctx->cfile);
- kvfree(ctx->bv);
+
+ /*
+ * ctx->bv is only set if setup_aio_ctx_iter() was call successfuly
+ * which means that iov_iter_get_pages() was a success and thus that
+ * we have taken reference on pages.
+ */
+ if (ctx->bv) {
+ unsigned i;
+
+ for (i = 0; i < ctx->npages; i++) {
+ if (ctx->should_dirty)
+ set_page_dirty(ctx->bv[i].bv_page);
+ put_page(ctx->bv[i].bv_page);
+ }
+ kvfree(ctx->bv);
+ }
+
kfree(ctx);
}
diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c
index 924269cec135..e32c264e3adb 100644
--- a/fs/cifs/smb2maperror.c
+++ b/fs/cifs/smb2maperror.c
@@ -1036,7 +1036,8 @@ static const struct status_to_posix_error smb2_error_map_table[] = {
{STATUS_UNFINISHED_CONTEXT_DELETED, -EIO,
"STATUS_UNFINISHED_CONTEXT_DELETED"},
{STATUS_NO_TGT_REPLY, -EIO, "STATUS_NO_TGT_REPLY"},
- {STATUS_OBJECTID_NOT_FOUND, -EIO, "STATUS_OBJECTID_NOT_FOUND"},
+ /* Note that ENOATTTR and ENODATA are the same errno */
+ {STATUS_OBJECTID_NOT_FOUND, -ENODATA, "STATUS_OBJECTID_NOT_FOUND"},
{STATUS_NO_IP_ADDRESSES, -EIO, "STATUS_NO_IP_ADDRESSES"},
{STATUS_WRONG_CREDENTIAL_HANDLE, -EIO,
"STATUS_WRONG_CREDENTIAL_HANDLE"},
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index 58700d2ba8cd..0a7ed2e3ad4f 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -555,7 +555,7 @@ smb2_tcon_has_lease(struct cifs_tcon *tcon, struct smb2_lease_break *rsp,
clear_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2,
&cinode->flags);
- queue_work(cifsoplockd_wq, &cfile->oplock_break);
+ cifs_queue_oplock_break(cfile);
kfree(lw);
return true;
}
@@ -719,8 +719,8 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2,
&cinode->flags);
spin_unlock(&cfile->file_info_lock);
- queue_work(cifsoplockd_wq,
- &cfile->oplock_break);
+
+ cifs_queue_oplock_break(cfile);
spin_unlock(&tcon->open_file_lock);
spin_unlock(&cifs_tcp_ses_lock);
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index b29f711ab965..d5434ac0571b 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -949,6 +949,16 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon,
resp_buftype[0] = resp_buftype[1] = resp_buftype[2] = CIFS_NO_BUFFER;
memset(rsp_iov, 0, sizeof(rsp_iov));
+ if (ses->server->ops->query_all_EAs) {
+ if (!ea_value) {
+ rc = ses->server->ops->query_all_EAs(xid, tcon, path,
+ ea_name, NULL, 0,
+ cifs_sb);
+ if (rc == -ENODATA)
+ goto sea_exit;
+ }
+ }
+
/* Open */
memset(&open_iov, 0, sizeof(open_iov));
rqst[0].rq_iov = open_iov;
@@ -2200,6 +2210,8 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon,
rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, &err_iov,
&resp_buftype);
+ if (!rc)
+ SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
if (!rc || !err_iov.iov_base) {
rc = -ENOENT;
goto free_path;
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 068febe37fe4..85a3c051e622 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -815,8 +815,11 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
} else if (rsp->DialectRevision == cpu_to_le16(SMB21_PROT_ID)) {
/* ops set to 3.0 by default for default so update */
ses->server->ops = &smb21_operations;
- } else if (rsp->DialectRevision == cpu_to_le16(SMB311_PROT_ID))
+ ses->server->vals = &smb21_values;
+ } else if (rsp->DialectRevision == cpu_to_le16(SMB311_PROT_ID)) {
ses->server->ops = &smb311_operations;
+ ses->server->vals = &smb311_values;
+ }
} else if (le16_to_cpu(rsp->DialectRevision) !=
ses->server->vals->protocol_id) {
/* if requested single dialect ensure returned dialect matched */
@@ -3387,8 +3390,6 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms,
rqst.rq_nvec = 1;
rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags, &rsp_iov);
- cifs_small_buf_release(req);
-
rsp = (struct smb2_read_rsp *)rsp_iov.iov_base;
if (rc) {
@@ -3401,12 +3402,15 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms,
rc);
}
free_rsp_buf(resp_buftype, rsp_iov.iov_base);
+ cifs_small_buf_release(req);
return rc == -ENODATA ? 0 : rc;
} else
trace_smb3_read_done(xid, req->PersistentFileId,
io_parms->tcon->tid, ses->Suid,
io_parms->offset, io_parms->length);
+ cifs_small_buf_release(req);
+
*nbytes = le32_to_cpu(rsp->DataLength);
if ((*nbytes > CIFS_MAX_MSGSIZE) ||
(*nbytes > io_parms->length)) {
@@ -3705,7 +3709,6 @@ SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms,
rc = cifs_send_recv(xid, io_parms->tcon->ses, &rqst,
&resp_buftype, flags, &rsp_iov);
- cifs_small_buf_release(req);
rsp = (struct smb2_write_rsp *)rsp_iov.iov_base;
if (rc) {
@@ -3723,6 +3726,7 @@ SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms,
io_parms->offset, *nbytes);
}
+ cifs_small_buf_release(req);
free_rsp_buf(resp_buftype, rsp);
return rc;
}
diff --git a/fs/cifs/trace.h b/fs/cifs/trace.h
index 59be48206932..b49bc925fb4f 100644
--- a/fs/cifs/trace.h
+++ b/fs/cifs/trace.h
@@ -378,19 +378,19 @@ DECLARE_EVENT_CLASS(smb3_tcon_class,
__field(unsigned int, xid)
__field(__u32, tid)
__field(__u64, sesid)
- __field(const char *, unc_name)
+ __string(name, unc_name)
__field(int, rc)
),
TP_fast_assign(
__entry->xid = xid;
__entry->tid = tid;
__entry->sesid = sesid;
- __entry->unc_name = unc_name;
+ __assign_str(name, unc_name);
__entry->rc = rc;
),
TP_printk("xid=%u sid=0x%llx tid=0x%x unc_name=%s rc=%d",
__entry->xid, __entry->sesid, __entry->tid,
- __entry->unc_name, __entry->rc)
+ __get_str(name), __entry->rc)
)
#define DEFINE_SMB3_TCON_EVENT(name) \
diff --git a/fs/dax.c b/fs/dax.c
index 05cca2214ae3..827ee143413e 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -33,6 +33,7 @@
#include <linux/sizes.h>
#include <linux/mmu_notifier.h>
#include <linux/iomap.h>
+#include <asm/pgalloc.h>
#include "internal.h"
#define CREATE_TRACE_POINTS
@@ -1409,7 +1410,9 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
{
struct address_space *mapping = vmf->vma->vm_file->f_mapping;
unsigned long pmd_addr = vmf->address & PMD_MASK;
+ struct vm_area_struct *vma = vmf->vma;
struct inode *inode = mapping->host;
+ pgtable_t pgtable = NULL;
struct page *zero_page;
spinlock_t *ptl;
pmd_t pmd_entry;
@@ -1424,12 +1427,22 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
*entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn,
DAX_PMD | DAX_ZERO_PAGE, false);
+ if (arch_needs_pgtable_deposit()) {
+ pgtable = pte_alloc_one(vma->vm_mm);
+ if (!pgtable)
+ return VM_FAULT_OOM;
+ }
+
ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd);
if (!pmd_none(*(vmf->pmd))) {
spin_unlock(ptl);
goto fallback;
}
+ if (pgtable) {
+ pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable);
+ mm_inc_nr_ptes(vma->vm_mm);
+ }
pmd_entry = mk_pmd(zero_page, vmf->vma->vm_page_prot);
pmd_entry = pmd_mkhuge(pmd_entry);
set_pmd_at(vmf->vma->vm_mm, pmd_addr, vmf->pmd, pmd_entry);
@@ -1438,6 +1451,8 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
return VM_FAULT_NOPAGE;
fallback:
+ if (pgtable)
+ pte_free(vma->vm_mm, pgtable);
trace_dax_pmd_load_hole_fallback(inode, vmf, zero_page, *entry);
return VM_FAULT_FALLBACK;
}
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 29c68c5d44d5..c4a4fc6f1a95 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -163,19 +163,24 @@ static int debugfs_show_options(struct seq_file *m, struct dentry *root)
return 0;
}
-static void debugfs_evict_inode(struct inode *inode)
+static void debugfs_i_callback(struct rcu_head *head)
{
- truncate_inode_pages_final(&inode->i_data);
- clear_inode(inode);
+ struct inode *inode = container_of(head, struct inode, i_rcu);
if (S_ISLNK(inode->i_mode))
kfree(inode->i_link);
+ free_inode_nonrcu(inode);
+}
+
+static void debugfs_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, debugfs_i_callback);
}
static const struct super_operations debugfs_super_operations = {
.statfs = simple_statfs,
.remount_fs = debugfs_remount,
.show_options = debugfs_show_options,
- .evict_inode = debugfs_evict_inode,
+ .destroy_inode = debugfs_destroy_inode,
};
static void debugfs_release_dentry(struct dentry *dentry)
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 2e76fb55d94a..5f24fdc140ad 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -999,6 +999,13 @@ resizefs_out:
if (!blk_queue_discard(q))
return -EOPNOTSUPP;
+ /*
+ * We haven't replayed the journal, so we cannot use our
+ * block-bitmap-guided storage zapping commands.
+ */
+ if (test_opt(sb, NOLOAD) && ext4_has_feature_journal(sb))
+ return -EROFS;
+
if (copy_from_user(&range, (struct fstrim_range __user *)arg,
sizeof(range)))
return -EFAULT;
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 3d9b18505c0c..e7ae26e36c9c 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -932,11 +932,18 @@ static int add_new_gdb_meta_bg(struct super_block *sb,
memcpy(n_group_desc, o_group_desc,
EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *));
n_group_desc[gdb_num] = gdb_bh;
+
+ BUFFER_TRACE(gdb_bh, "get_write_access");
+ err = ext4_journal_get_write_access(handle, gdb_bh);
+ if (err) {
+ kvfree(n_group_desc);
+ brelse(gdb_bh);
+ return err;
+ }
+
EXT4_SB(sb)->s_group_desc = n_group_desc;
EXT4_SB(sb)->s_gdb_count++;
kvfree(o_group_desc);
- BUFFER_TRACE(gdb_bh, "get_write_access");
- err = ext4_journal_get_write_access(handle, gdb_bh);
return err;
}
@@ -2073,6 +2080,10 @@ out:
free_flex_gd(flex_gd);
if (resize_inode != NULL)
iput(resize_inode);
- ext4_msg(sb, KERN_INFO, "resized filesystem to %llu", n_blocks_count);
+ if (err)
+ ext4_warning(sb, "error (%d) occurred during "
+ "file system resize", err);
+ ext4_msg(sb, KERN_INFO, "resized filesystem to %llu",
+ ext4_blocks_count(es));
return err;
}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index fb12d3c17c1b..b9bca7298f96 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -430,6 +430,12 @@ static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
spin_unlock(&sbi->s_md_lock);
}
+static bool system_going_down(void)
+{
+ return system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF
+ || system_state == SYSTEM_RESTART;
+}
+
/* Deal with the reporting of failure conditions on a filesystem such as
* inconsistencies detected or read IO failures.
*
@@ -460,7 +466,12 @@ static void ext4_handle_error(struct super_block *sb)
if (journal)
jbd2_journal_abort(journal, -EIO);
}
- if (test_opt(sb, ERRORS_RO)) {
+ /*
+ * We force ERRORS_RO behavior when system is rebooting. Otherwise we
+ * could panic during 'reboot -f' as the underlying device got already
+ * disabled.
+ */
+ if (test_opt(sb, ERRORS_RO) || system_going_down()) {
ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
/*
* Make sure updated value of ->s_mount_flags will be visible
@@ -468,8 +479,7 @@ static void ext4_handle_error(struct super_block *sb)
*/
smp_wmb();
sb->s_flags |= SB_RDONLY;
- }
- if (test_opt(sb, ERRORS_PANIC)) {
+ } else if (test_opt(sb, ERRORS_PANIC)) {
if (EXT4_SB(sb)->s_journal &&
!(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR))
return;
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 86ed9c686249..dc82e7757f67 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -829,6 +829,7 @@ int ext4_get_inode_usage(struct inode *inode, qsize_t *usage)
bh = ext4_sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl, REQ_PRIO);
if (IS_ERR(bh)) {
ret = PTR_ERR(bh);
+ bh = NULL;
goto out;
}
@@ -2903,6 +2904,7 @@ int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
if (error == -EIO)
EXT4_ERROR_INODE(inode, "block %llu read error",
EXT4_I(inode)->i_file_acl);
+ bh = NULL;
goto cleanup;
}
error = ext4_xattr_check_block(inode, bh);
@@ -3059,6 +3061,7 @@ ext4_xattr_block_cache_find(struct inode *inode,
if (IS_ERR(bh)) {
if (PTR_ERR(bh) == -ENOMEM)
return NULL;
+ bh = NULL;
EXT4_ERROR_INODE(inode, "block %lu read error",
(unsigned long)ce->e_value);
} else if (ext4_xattr_cmp(header, BHDR(bh)) == 0) {
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index f955cd3e0677..7743fa83b895 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -306,8 +306,9 @@ static int f2fs_write_meta_pages(struct address_space *mapping,
goto skip_write;
/* collect a number of dirty meta pages and write together */
- if (wbc->for_kupdate ||
- get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META))
+ if (wbc->sync_mode != WB_SYNC_ALL &&
+ get_pages(sbi, F2FS_DIRTY_META) <
+ nr_pages_to_skip(sbi, META))
goto skip_write;
/* if locked failed, cp will flush dirty pages instead */
@@ -405,7 +406,7 @@ static int f2fs_set_meta_page_dirty(struct page *page)
if (!PageDirty(page)) {
__set_page_dirty_nobuffers(page);
inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META);
- SetPagePrivate(page);
+ f2fs_set_page_private(page, 0);
f2fs_trace_pid(page);
return 1;
}
@@ -956,7 +957,7 @@ void f2fs_update_dirty_page(struct inode *inode, struct page *page)
inode_inc_dirty_pages(inode);
spin_unlock(&sbi->inode_lock[type]);
- SetPagePrivate(page);
+ f2fs_set_page_private(page, 0);
f2fs_trace_pid(page);
}
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index f91d8630c9a2..6e598dd85fb5 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -218,12 +218,14 @@ struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
struct block_device *bdev = sbi->sb->s_bdev;
int i;
- for (i = 0; i < sbi->s_ndevs; i++) {
- if (FDEV(i).start_blk <= blk_addr &&
- FDEV(i).end_blk >= blk_addr) {
- blk_addr -= FDEV(i).start_blk;
- bdev = FDEV(i).bdev;
- break;
+ if (f2fs_is_multi_device(sbi)) {
+ for (i = 0; i < sbi->s_ndevs; i++) {
+ if (FDEV(i).start_blk <= blk_addr &&
+ FDEV(i).end_blk >= blk_addr) {
+ blk_addr -= FDEV(i).start_blk;
+ bdev = FDEV(i).bdev;
+ break;
+ }
}
}
if (bio) {
@@ -237,6 +239,9 @@ int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr)
{
int i;
+ if (!f2fs_is_multi_device(sbi))
+ return 0;
+
for (i = 0; i < sbi->s_ndevs; i++)
if (FDEV(i).start_blk <= blkaddr && FDEV(i).end_blk >= blkaddr)
return i;
@@ -2711,8 +2716,7 @@ void f2fs_invalidate_page(struct page *page, unsigned int offset,
if (IS_ATOMIC_WRITTEN_PAGE(page))
return f2fs_drop_inmem_page(inode, page);
- set_page_private(page, 0);
- ClearPagePrivate(page);
+ f2fs_clear_page_private(page);
}
int f2fs_release_page(struct page *page, gfp_t wait)
@@ -2726,8 +2730,7 @@ int f2fs_release_page(struct page *page, gfp_t wait)
return 0;
clear_cold_data(page);
- set_page_private(page, 0);
- ClearPagePrivate(page);
+ f2fs_clear_page_private(page);
return 1;
}
@@ -2795,12 +2798,8 @@ int f2fs_migrate_page(struct address_space *mapping,
return -EAGAIN;
}
- /*
- * A reference is expected if PagePrivate set when move mapping,
- * however F2FS breaks this for maintaining dirty page counts when
- * truncating pages. So here adjusting the 'extra_count' make it work.
- */
- extra_count = (atomic_written ? 1 : 0) - page_has_private(page);
+ /* one extra reference was held for atomic_write page */
+ extra_count = atomic_written ? 1 : 0;
rc = migrate_page_move_mapping(mapping, newpage,
page, mode, extra_count);
if (rc != MIGRATEPAGE_SUCCESS) {
@@ -2821,9 +2820,10 @@ int f2fs_migrate_page(struct address_space *mapping,
get_page(newpage);
}
- if (PagePrivate(page))
- SetPagePrivate(newpage);
- set_page_private(newpage, page_private(page));
+ if (PagePrivate(page)) {
+ f2fs_set_page_private(newpage, page_private(page));
+ f2fs_clear_page_private(page);
+ }
if (mode != MIGRATE_SYNC_NO_COPY)
migrate_page_copy(newpage, page);
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 50d0d36280fa..99a6063c2327 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -728,7 +728,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
!f2fs_truncate_hole(dir, page->index, page->index + 1)) {
f2fs_clear_page_cache_dirty_tag(page);
clear_page_dirty_for_io(page);
- ClearPagePrivate(page);
+ f2fs_clear_page_private(page);
ClearPageUptodate(page);
clear_cold_data(page);
inode_dec_dirty_pages(dir);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 279bc00489cc..48f1bbf3e87e 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1364,6 +1364,17 @@ static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type)
}
#endif
+/*
+ * Test if the mounted volume is a multi-device volume.
+ * - For a single regular disk volume, sbi->s_ndevs is 0.
+ * - For a single zoned disk volume, sbi->s_ndevs is 1.
+ * - For a multi-device volume, sbi->s_ndevs is always 2 or more.
+ */
+static inline bool f2fs_is_multi_device(struct f2fs_sb_info *sbi)
+{
+ return sbi->s_ndevs > 1;
+}
+
/* For write statistics. Suppose sector size is 512 bytes,
* and the return value is in kbytes. s is of struct f2fs_sb_info.
*/
@@ -2825,6 +2836,27 @@ static inline bool is_valid_data_blkaddr(struct f2fs_sb_info *sbi,
return true;
}
+static inline void f2fs_set_page_private(struct page *page,
+ unsigned long data)
+{
+ if (PagePrivate(page))
+ return;
+
+ get_page(page);
+ SetPagePrivate(page);
+ set_page_private(page, data);
+}
+
+static inline void f2fs_clear_page_private(struct page *page)
+{
+ if (!PagePrivate(page))
+ return;
+
+ set_page_private(page, 0);
+ ClearPagePrivate(page);
+ f2fs_put_page(page, 0);
+}
+
/*
* file.c
*/
@@ -3591,7 +3623,7 @@ static inline bool f2fs_force_buffered_io(struct inode *inode,
if (f2fs_post_read_required(inode))
return true;
- if (sbi->s_ndevs)
+ if (f2fs_is_multi_device(sbi))
return true;
/*
* for blkzoned device, fallback direct IO to buffered IO, so
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index ae2b45e75847..bc078aea60ae 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -768,7 +768,6 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
int err;
- bool size_changed = false;
if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
return -EIO;
@@ -843,8 +842,6 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
down_write(&F2FS_I(inode)->i_sem);
F2FS_I(inode)->last_disk_size = i_size_read(inode);
up_write(&F2FS_I(inode)->i_sem);
-
- size_changed = true;
}
__setattr_copy(inode, attr);
@@ -858,7 +855,7 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
}
/* file size may changed here */
- f2fs_mark_inode_dirty_sync(inode, size_changed);
+ f2fs_mark_inode_dirty_sync(inode, true);
/* inode change will produce dirty node pages flushed by checkpoint */
f2fs_balance_fs(F2FS_I_SB(inode), true);
@@ -2573,7 +2570,7 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg)
sizeof(range)))
return -EFAULT;
- if (sbi->s_ndevs <= 1 || sbi->s_ndevs - 1 <= range.dev_num ||
+ if (!f2fs_is_multi_device(sbi) || sbi->s_ndevs - 1 <= range.dev_num ||
__is_large_section(sbi)) {
f2fs_msg(sbi->sb, KERN_WARNING,
"Can't flush %u in %d for segs_per_sec %u != 1\n",
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 195cf0f9d9ef..ab764bd106de 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -1346,7 +1346,7 @@ void f2fs_build_gc_manager(struct f2fs_sb_info *sbi)
sbi->gc_pin_file_threshold = DEF_GC_FAILED_PINNED_FILES;
/* give warm/cold data area from slower device */
- if (sbi->s_ndevs && !__is_large_section(sbi))
+ if (f2fs_is_multi_device(sbi) && !__is_large_section(sbi))
SIT_I(sbi)->last_victim[ALLOC_NEXT] =
GET_SEGNO(sbi, FDEV(0).end_blk) + 1;
}
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 4f450e573312..3f99ab288695 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1920,7 +1920,9 @@ static int f2fs_write_node_pages(struct address_space *mapping,
f2fs_balance_fs_bg(sbi);
/* collect a number of dirty node pages and write together */
- if (get_pages(sbi, F2FS_DIRTY_NODES) < nr_pages_to_skip(sbi, NODE))
+ if (wbc->sync_mode != WB_SYNC_ALL &&
+ get_pages(sbi, F2FS_DIRTY_NODES) <
+ nr_pages_to_skip(sbi, NODE))
goto skip_write;
if (wbc->sync_mode == WB_SYNC_ALL)
@@ -1959,7 +1961,7 @@ static int f2fs_set_node_page_dirty(struct page *page)
if (!PageDirty(page)) {
__set_page_dirty_nobuffers(page);
inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES);
- SetPagePrivate(page);
+ f2fs_set_page_private(page, 0);
f2fs_trace_pid(page);
return 1;
}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index e1b1d390b329..2b809b54d81b 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -191,8 +191,7 @@ void f2fs_register_inmem_page(struct inode *inode, struct page *page)
f2fs_trace_pid(page);
- set_page_private(page, (unsigned long)ATOMIC_WRITTEN_PAGE);
- SetPagePrivate(page);
+ f2fs_set_page_private(page, (unsigned long)ATOMIC_WRITTEN_PAGE);
new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS);
@@ -280,8 +279,7 @@ next:
ClearPageUptodate(page);
clear_cold_data(page);
}
- set_page_private(page, 0);
- ClearPagePrivate(page);
+ f2fs_clear_page_private(page);
f2fs_put_page(page, 1);
list_del(&cur->list);
@@ -370,8 +368,7 @@ void f2fs_drop_inmem_page(struct inode *inode, struct page *page)
kmem_cache_free(inmem_entry_slab, cur);
ClearPageUptodate(page);
- set_page_private(page, 0);
- ClearPagePrivate(page);
+ f2fs_clear_page_private(page);
f2fs_put_page(page, 0);
trace_f2fs_commit_inmem_page(page, INMEM_INVALIDATE);
@@ -579,7 +576,7 @@ static int submit_flush_wait(struct f2fs_sb_info *sbi, nid_t ino)
int ret = 0;
int i;
- if (!sbi->s_ndevs)
+ if (!f2fs_is_multi_device(sbi))
return __submit_flush_wait(sbi, sbi->sb->s_bdev);
for (i = 0; i < sbi->s_ndevs; i++) {
@@ -647,7 +644,8 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino)
return ret;
}
- if (atomic_inc_return(&fcc->queued_flush) == 1 || sbi->s_ndevs > 1) {
+ if (atomic_inc_return(&fcc->queued_flush) == 1 ||
+ f2fs_is_multi_device(sbi)) {
ret = submit_flush_wait(sbi, ino);
atomic_dec(&fcc->queued_flush);
@@ -753,7 +751,7 @@ int f2fs_flush_device_cache(struct f2fs_sb_info *sbi)
{
int ret = 0, i;
- if (!sbi->s_ndevs)
+ if (!f2fs_is_multi_device(sbi))
return 0;
for (i = 1; i < sbi->s_ndevs; i++) {
@@ -1362,7 +1360,7 @@ static int __queue_discard_cmd(struct f2fs_sb_info *sbi,
trace_f2fs_queue_discard(bdev, blkstart, blklen);
- if (sbi->s_ndevs) {
+ if (f2fs_is_multi_device(sbi)) {
int devi = f2fs_target_device_index(sbi, blkstart);
blkstart -= FDEV(devi).start_blk;
@@ -1717,7 +1715,7 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
block_t lblkstart = blkstart;
int devi = 0;
- if (sbi->s_ndevs) {
+ if (f2fs_is_multi_device(sbi)) {
devi = f2fs_target_device_index(sbi, blkstart);
blkstart -= FDEV(devi).start_blk;
}
@@ -3074,7 +3072,7 @@ static void update_device_state(struct f2fs_io_info *fio)
struct f2fs_sb_info *sbi = fio->sbi;
unsigned int devidx;
- if (!sbi->s_ndevs)
+ if (!f2fs_is_multi_device(sbi))
return;
devidx = f2fs_target_device_index(sbi, fio->new_blkaddr);
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 5892fa3c885f..144ffba3ec5a 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1460,9 +1460,16 @@ static int f2fs_enable_quotas(struct super_block *sb);
static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
{
+ unsigned int s_flags = sbi->sb->s_flags;
struct cp_control cpc;
- int err;
+ int err = 0;
+ int ret;
+ if (s_flags & SB_RDONLY) {
+ f2fs_msg(sbi->sb, KERN_ERR,
+ "checkpoint=disable on readonly fs");
+ return -EINVAL;
+ }
sbi->sb->s_flags |= SB_ACTIVE;
f2fs_update_time(sbi, DISABLE_TIME);
@@ -1470,18 +1477,24 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
while (!f2fs_time_over(sbi, DISABLE_TIME)) {
mutex_lock(&sbi->gc_mutex);
err = f2fs_gc(sbi, true, false, NULL_SEGNO);
- if (err == -ENODATA)
+ if (err == -ENODATA) {
+ err = 0;
break;
+ }
if (err && err != -EAGAIN)
- return err;
+ break;
}
- err = sync_filesystem(sbi->sb);
- if (err)
- return err;
+ ret = sync_filesystem(sbi->sb);
+ if (ret || err) {
+ err = ret ? ret: err;
+ goto restore_flag;
+ }
- if (f2fs_disable_cp_again(sbi))
- return -EAGAIN;
+ if (f2fs_disable_cp_again(sbi)) {
+ err = -EAGAIN;
+ goto restore_flag;
+ }
mutex_lock(&sbi->gc_mutex);
cpc.reason = CP_PAUSE;
@@ -1490,7 +1503,9 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
sbi->unusable_block_count = 0;
mutex_unlock(&sbi->gc_mutex);
- return 0;
+restore_flag:
+ sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */
+ return err;
}
static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi)
@@ -3359,7 +3374,7 @@ skip_recovery:
if (test_opt(sbi, DISABLE_CHECKPOINT)) {
err = f2fs_disable_checkpoint(sbi);
if (err)
- goto free_meta;
+ goto sync_free_meta;
} else if (is_set_ckpt_flags(sbi, CP_DISABLED_FLAG)) {
f2fs_enable_checkpoint(sbi);
}
@@ -3372,7 +3387,7 @@ skip_recovery:
/* After POR, we can run background GC thread.*/
err = f2fs_start_gc_thread(sbi);
if (err)
- goto free_meta;
+ goto sync_free_meta;
}
kvfree(options);
@@ -3394,6 +3409,11 @@ skip_recovery:
f2fs_update_time(sbi, REQ_TIME);
return 0;
+sync_free_meta:
+ /* safe to flush all the data */
+ sync_filesystem(sbi->sb);
+ retry = false;
+
free_meta:
#ifdef CONFIG_QUOTA
f2fs_truncate_quota_inode_pages(sb);
@@ -3407,6 +3427,8 @@ free_meta:
* falls into an infinite loop in f2fs_sync_meta_pages().
*/
truncate_inode_pages_final(META_MAPPING(sbi));
+ /* evict some inodes being cached by GC */
+ evict_inodes(sb);
f2fs_unregister_sysfs(sbi);
free_root_inode:
dput(sb->s_root);
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 73b92985198b..6b6fe6431a64 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -347,7 +347,7 @@ check:
*base_addr = txattr_addr;
return 0;
out:
- kzfree(txattr_addr);
+ kvfree(txattr_addr);
return err;
}
@@ -390,7 +390,7 @@ static int read_all_xattrs(struct inode *inode, struct page *ipage,
*base_addr = txattr_addr;
return 0;
fail:
- kzfree(txattr_addr);
+ kvfree(txattr_addr);
return err;
}
@@ -517,7 +517,7 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name,
}
error = size;
out:
- kzfree(base_addr);
+ kvfree(base_addr);
return error;
}
@@ -563,7 +563,7 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
}
error = buffer_size - rest;
cleanup:
- kzfree(base_addr);
+ kvfree(base_addr);
return error;
}
@@ -694,7 +694,7 @@ static int __f2fs_setxattr(struct inode *inode, int index,
if (!error && S_ISDIR(inode->i_mode))
set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_CP);
exit:
- kzfree(base_addr);
+ kvfree(base_addr);
return error;
}
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 809c0f2f9942..64f4de983468 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -2034,10 +2034,8 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
rem += pipe->bufs[(pipe->curbuf + idx) & (pipe->buffers - 1)].len;
ret = -EINVAL;
- if (rem < len) {
- pipe_unlock(pipe);
- goto out;
- }
+ if (rem < len)
+ goto out_free;
rem = len;
while (rem) {
@@ -2055,7 +2053,9 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
pipe->nrbufs--;
} else {
- pipe_buf_get(pipe, ibuf);
+ if (!pipe_buf_get(pipe, ibuf))
+ goto out_free;
+
*obuf = *ibuf;
obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
obuf->len = rem;
@@ -2078,11 +2078,11 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
ret = fuse_dev_do_write(fud, &cs, len);
pipe_lock(pipe);
+out_free:
for (idx = 0; idx < nbuf; idx++)
pipe_buf_release(pipe, &bufs[idx]);
pipe_unlock(pipe);
-out:
kvfree(bufs);
return ret;
}
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index a7fa037b876b..a3a3d256fb0e 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -741,11 +741,17 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb,
umode_t mode, dev_t dev)
{
struct inode *inode;
- struct resv_map *resv_map;
+ struct resv_map *resv_map = NULL;
- resv_map = resv_map_alloc();
- if (!resv_map)
- return NULL;
+ /*
+ * Reserve maps are only needed for inodes that can have associated
+ * page allocations.
+ */
+ if (S_ISREG(mode) || S_ISLNK(mode)) {
+ resv_map = resv_map_alloc();
+ if (!resv_map)
+ return NULL;
+ }
inode = new_inode(sb);
if (inode) {
@@ -780,8 +786,10 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb,
break;
}
lockdep_annotate_inode_mutex_key(inode);
- } else
- kref_put(&resv_map->refs, resv_map_release);
+ } else {
+ if (resv_map)
+ kref_put(&resv_map->refs, resv_map_release);
+ }
return inode;
}
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index 389ea53ea487..bccfc40b3a74 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -1414,11 +1414,6 @@ void jffs2_do_clear_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f)
jffs2_kill_fragtree(&f->fragtree, deleted?c:NULL);
- if (f->target) {
- kfree(f->target);
- f->target = NULL;
- }
-
fds = f->dents;
while(fds) {
fd = fds;
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index bb6ae387469f..05d892c79339 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -47,7 +47,10 @@ static struct inode *jffs2_alloc_inode(struct super_block *sb)
static void jffs2_i_callback(struct rcu_head *head)
{
struct inode *inode = container_of(head, struct inode, i_rcu);
- kmem_cache_free(jffs2_inode_cachep, JFFS2_INODE_INFO(inode));
+ struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
+
+ kfree(f->target);
+ kmem_cache_free(jffs2_inode_cachep, f);
}
static void jffs2_destroy_inode(struct inode *inode)
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 4ca0b5c18192..853a69e493f5 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -650,11 +650,10 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
kn->id.generation = gen;
/*
- * set ino first. This barrier is paired with atomic_inc_not_zero in
+ * set ino first. This RELEASE is paired with atomic_inc_not_zero in
* kernfs_find_and_get_node_by_ino
*/
- smp_mb__before_atomic();
- atomic_set(&kn->count, 1);
+ atomic_set_release(&kn->count, 1);
atomic_set(&kn->active, KN_DEACTIVATED_BIAS);
RB_CLEAR_NODE(&kn->rb);
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index fb1cf1a4bda2..90d71fda65ce 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -453,7 +453,7 @@ void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
case XPRT_TRANSPORT_RDMA:
if (retrans == NFS_UNSPEC_RETRANS)
to->to_retries = NFS_DEF_TCP_RETRANS;
- if (timeo == NFS_UNSPEC_TIMEO || to->to_retries == 0)
+ if (timeo == NFS_UNSPEC_TIMEO || to->to_initval == 0)
to->to_initval = NFS_DEF_TCP_TIMEO * HZ / 10;
if (to->to_initval > NFS_MAX_TCP_TIMEOUT)
to->to_initval = NFS_MAX_TCP_TIMEOUT;
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index fed06fd9998d..94f98e190e63 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -329,9 +329,6 @@ ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src,
};
ssize_t err, err2;
- if (!nfs_server_capable(file_inode(dst), NFS_CAP_COPY))
- return -EOPNOTSUPP;
-
src_lock = nfs_get_lock_context(nfs_file_open_context(src));
if (IS_ERR(src_lock))
return PTR_ERR(src_lock);
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 45b2322e092d..00d17198ee12 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -133,8 +133,10 @@ static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out,
size_t count, unsigned int flags)
{
+ if (!nfs_server_capable(file_inode(file_out), NFS_CAP_COPY))
+ return -EOPNOTSUPP;
if (file_inode(file_in) == file_inode(file_out))
- return -EINVAL;
+ return -EOPNOTSUPP;
return nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count);
}
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 0570391eaa16..15c025c1a305 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2041,7 +2041,8 @@ static int nfs23_validate_mount_data(void *options,
memcpy(sap, &data->addr, sizeof(data->addr));
args->nfs_server.addrlen = sizeof(data->addr);
args->nfs_server.port = ntohs(data->addr.sin_port);
- if (!nfs_verify_server_address(sap))
+ if (sap->sa_family != AF_INET ||
+ !nfs_verify_server_address(sap))
goto out_no_address;
if (!(data->flags & NFS_MOUNT_TCP))
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index c74e4538d0eb..258f741d6a21 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -1023,8 +1023,9 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
cb->cb_seq_status = 1;
cb->cb_status = 0;
if (minorversion) {
- if (!nfsd41_cb_get_slot(clp, task))
+ if (!cb->cb_holds_slot && !nfsd41_cb_get_slot(clp, task))
return;
+ cb->cb_holds_slot = true;
}
rpc_call_start(task);
}
@@ -1051,6 +1052,9 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback
return true;
}
+ if (!cb->cb_holds_slot)
+ goto need_restart;
+
switch (cb->cb_seq_status) {
case 0:
/*
@@ -1089,6 +1093,7 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback
cb->cb_seq_status);
}
+ cb->cb_holds_slot = false;
clear_bit(0, &clp->cl_cb_slot_busy);
rpc_wake_up_next(&clp->cl_cb_waitq);
dprintk("%s: freed slot, new seqid=%d\n", __func__,
@@ -1296,6 +1301,7 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
cb->cb_seq_status = 1;
cb->cb_status = 0;
cb->cb_need_restart = false;
+ cb->cb_holds_slot = false;
}
void nfsd4_run_cb(struct nfsd4_callback *cb)
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 6a45fb00c5fc..f056b1d3fecd 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -265,6 +265,7 @@ find_or_allocate_block(struct nfs4_lockowner *lo, struct knfsd_fh *fh,
static void
free_blocked_lock(struct nfsd4_blocked_lock *nbl)
{
+ locks_delete_block(&nbl->nbl_lock);
locks_release_private(&nbl->nbl_lock);
kfree(nbl);
}
@@ -293,11 +294,18 @@ remove_blocked_locks(struct nfs4_lockowner *lo)
nbl = list_first_entry(&reaplist, struct nfsd4_blocked_lock,
nbl_lru);
list_del_init(&nbl->nbl_lru);
- locks_delete_block(&nbl->nbl_lock);
free_blocked_lock(nbl);
}
}
+static void
+nfsd4_cb_notify_lock_prepare(struct nfsd4_callback *cb)
+{
+ struct nfsd4_blocked_lock *nbl = container_of(cb,
+ struct nfsd4_blocked_lock, nbl_cb);
+ locks_delete_block(&nbl->nbl_lock);
+}
+
static int
nfsd4_cb_notify_lock_done(struct nfsd4_callback *cb, struct rpc_task *task)
{
@@ -325,6 +333,7 @@ nfsd4_cb_notify_lock_release(struct nfsd4_callback *cb)
}
static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops = {
+ .prepare = nfsd4_cb_notify_lock_prepare,
.done = nfsd4_cb_notify_lock_done,
.release = nfsd4_cb_notify_lock_release,
};
@@ -4863,7 +4872,6 @@ nfs4_laundromat(struct nfsd_net *nn)
nbl = list_first_entry(&reaplist,
struct nfsd4_blocked_lock, nbl_lru);
list_del_init(&nbl->nbl_lru);
- locks_delete_block(&nbl->nbl_lock);
free_blocked_lock(nbl);
}
out:
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 396c76755b03..9d6cb246c6c5 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -70,6 +70,7 @@ struct nfsd4_callback {
int cb_seq_status;
int cb_status;
bool cb_need_restart;
+ bool cb_holds_slot;
};
struct nfsd4_callback_ops {
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 798f1253141a..3b7b8e95c98a 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -519,8 +519,10 @@ static int inotify_update_existing_watch(struct fsnotify_group *group,
fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, group);
if (!fsn_mark)
return -ENOENT;
- else if (create)
- return -EEXIST;
+ else if (create) {
+ ret = -EEXIST;
+ goto out;
+ }
i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark);
@@ -548,6 +550,7 @@ static int inotify_update_existing_watch(struct fsnotify_group *group,
/* return the wd */
ret = i_mark->wd;
+out:
/* match the get from fsnotify_find_mark() */
fsnotify_put_mark(fsn_mark);
diff --git a/fs/open.c b/fs/open.c
index f1c2f855fd43..a00350018a47 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -1215,3 +1215,21 @@ int nonseekable_open(struct inode *inode, struct file *filp)
}
EXPORT_SYMBOL(nonseekable_open);
+
+/*
+ * stream_open is used by subsystems that want stream-like file descriptors.
+ * Such file descriptors are not seekable and don't have notion of position
+ * (file.f_pos is always 0). Contrary to file descriptors of other regular
+ * files, .read() and .write() can run simultaneously.
+ *
+ * stream_open never fails and is marked to return int so that it could be
+ * directly used as file_operations.open .
+ */
+int stream_open(struct inode *inode, struct file *filp)
+{
+ filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE | FMODE_ATOMIC_POS);
+ filp->f_mode |= FMODE_STREAM;
+ return 0;
+}
+
+EXPORT_SYMBOL(stream_open);
diff --git a/fs/pipe.c b/fs/pipe.c
index c51750ed4011..2a297bce381f 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -189,9 +189,9 @@ EXPORT_SYMBOL(generic_pipe_buf_steal);
* in the tee() system call, when we duplicate the buffers in one
* pipe into another.
*/
-void generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
+bool generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
{
- get_page(buf->page);
+ return try_get_page(buf->page);
}
EXPORT_SYMBOL(generic_pipe_buf_get);
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index bbcc185062bb..d29d869abec1 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -54,6 +54,28 @@ static LIST_HEAD(kclist_head);
static DECLARE_RWSEM(kclist_lock);
static int kcore_need_update = 1;
+/*
+ * Returns > 0 for RAM pages, 0 for non-RAM pages, < 0 on error
+ * Same as oldmem_pfn_is_ram in vmcore
+ */
+static int (*mem_pfn_is_ram)(unsigned long pfn);
+
+int __init register_mem_pfn_is_ram(int (*fn)(unsigned long pfn))
+{
+ if (mem_pfn_is_ram)
+ return -EBUSY;
+ mem_pfn_is_ram = fn;
+ return 0;
+}
+
+static int pfn_is_ram(unsigned long pfn)
+{
+ if (mem_pfn_is_ram)
+ return mem_pfn_is_ram(pfn);
+ else
+ return 1;
+}
+
/* This doesn't grab kclist_lock, so it should only be used at init time. */
void __init kclist_add(struct kcore_list *new, void *addr, size_t size,
int type)
@@ -465,6 +487,11 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
goto out;
}
m = NULL; /* skip the list anchor */
+ } else if (!pfn_is_ram(__pa(start) >> PAGE_SHIFT)) {
+ if (clear_user(buffer, tsz)) {
+ ret = -EFAULT;
+ goto out;
+ }
} else if (m->type == KCORE_VMALLOC) {
vread(buf, (char *)start, tsz);
/* we have to zero-fill user buffer even if no read */
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index d65390727541..7325baa8f9d4 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -1626,9 +1626,11 @@ static void drop_sysctl_table(struct ctl_table_header *header)
if (--header->nreg)
return;
- if (parent)
+ if (parent) {
put_links(header);
- start_unregistering(header);
+ start_unregistering(header);
+ }
+
if (!--header->count)
kfree_rcu(header, rcu);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 85b0ef890b28..91bd2ff0c62c 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1141,6 +1141,24 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
count = -EINTR;
goto out_mm;
}
+ /*
+ * Avoid to modify vma->vm_flags
+ * without locked ops while the
+ * coredump reads the vm_flags.
+ */
+ if (!mmget_still_valid(mm)) {
+ /*
+ * Silently return "count"
+ * like if get_task_mm()
+ * failed. FIXME: should this
+ * function have returned
+ * -ESRCH if get_task_mm()
+ * failed like if
+ * get_proc_task() fails?
+ */
+ up_write(&mm->mmap_sem);
+ goto out_mm;
+ }
for (vma = mm->mmap; vma; vma = vma->vm_next) {
vma->vm_flags &= ~VM_SOFTDIRTY;
vma_set_page_prot(vma);
diff --git a/fs/read_write.c b/fs/read_write.c
index 27b69b85d49f..3d3194e32201 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -560,12 +560,13 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_
static inline loff_t file_pos_read(struct file *file)
{
- return file->f_pos;
+ return file->f_mode & FMODE_STREAM ? 0 : file->f_pos;
}
static inline void file_pos_write(struct file *file, loff_t pos)
{
- file->f_pos = pos;
+ if ((file->f_mode & FMODE_STREAM) == 0)
+ file->f_pos = pos;
}
ssize_t ksys_read(unsigned int fd, char __user *buf, size_t count)
diff --git a/fs/splice.c b/fs/splice.c
index 90c29675d573..c38c7e7a49c9 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -333,8 +333,8 @@ const struct pipe_buf_operations default_pipe_buf_ops = {
.get = generic_pipe_buf_get,
};
-static int generic_pipe_buf_nosteal(struct pipe_inode_info *pipe,
- struct pipe_buffer *buf)
+int generic_pipe_buf_nosteal(struct pipe_inode_info *pipe,
+ struct pipe_buffer *buf)
{
return 1;
}
@@ -1588,7 +1588,11 @@ retry:
* Get a reference to this pipe buffer,
* so we can copy the contents over.
*/
- pipe_buf_get(ipipe, ibuf);
+ if (!pipe_buf_get(ipipe, ibuf)) {
+ if (ret == 0)
+ ret = -EFAULT;
+ break;
+ }
*obuf = *ibuf;
/*
@@ -1662,7 +1666,11 @@ static int link_pipe(struct pipe_inode_info *ipipe,
* Get a reference to this pipe buffer,
* so we can copy the contents over.
*/
- pipe_buf_get(ipipe, ibuf);
+ if (!pipe_buf_get(ipipe, ibuf)) {
+ if (ret == 0)
+ ret = -EFAULT;
+ break;
+ }
obuf = opipe->bufs + nbuf;
*obuf = *ibuf;
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 89800fc7dc9d..f5de1e726356 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -629,6 +629,8 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
/* the various vma->vm_userfaultfd_ctx still points to it */
down_write(&mm->mmap_sem);
+ /* no task can run (and in turn coredump) yet */
+ VM_WARN_ON(!mmget_still_valid(mm));
for (vma = mm->mmap; vma; vma = vma->vm_next)
if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) {
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
@@ -883,6 +885,8 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
* taking the mmap_sem for writing.
*/
down_write(&mm->mmap_sem);
+ if (!mmget_still_valid(mm))
+ goto skip_mm;
prev = NULL;
for (vma = mm->mmap; vma; vma = vma->vm_next) {
cond_resched();
@@ -905,6 +909,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
vma->vm_flags = new_flags;
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
}
+skip_mm:
up_write(&mm->mmap_sem);
mmput(mm);
wakeup:
@@ -1333,6 +1338,8 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
goto out;
down_write(&mm->mmap_sem);
+ if (!mmget_still_valid(mm))
+ goto out_unlock;
vma = find_vma_prev(mm, start, &prev);
if (!vma)
goto out_unlock;
@@ -1520,6 +1527,8 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
goto out;
down_write(&mm->mmap_sem);
+ if (!mmget_still_valid(mm))
+ goto out_unlock;
vma = find_vma_prev(mm, start, &prev);
if (!vma)
goto out_unlock;