diff options
Diffstat (limited to 'fs/io_uring.c')
-rw-r--r-- | fs/io_uring.c | 116 |
1 files changed, 84 insertions, 32 deletions
diff --git a/fs/io_uring.c b/fs/io_uring.c index a340147387ec..e37b84146453 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -71,6 +71,7 @@ #include <linux/sizes.h> #include <linux/hugetlb.h> #include <linux/highmem.h> +#include <linux/fs_struct.h> #include <uapi/linux/io_uring.h> @@ -239,7 +240,7 @@ struct io_ring_ctx { struct user_struct *user; - struct cred *creds; + const struct cred *creds; struct completion ctx_done; @@ -334,6 +335,8 @@ struct io_kiocb { u32 result; u32 sequence; + struct fs_struct *fs; + struct work_struct work; }; @@ -651,6 +654,7 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx, /* one is dropped after submission, the other at completion */ refcount_set(&req->refs, 2); req->result = 0; + req->fs = NULL; return req; out: percpu_ref_put(&ctx->refs); @@ -882,11 +886,17 @@ static void io_iopoll_reap_events(struct io_ring_ctx *ctx) mutex_unlock(&ctx->uring_lock); } -static int __io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events, - long min) +static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events, + long min) { int iters = 0, ret = 0; + /* + * We disallow the app entering submit/complete with polling, but we + * still need to lock the ring to prevent racing with polled issue + * that got punted to a workqueue. + */ + mutex_lock(&ctx->uring_lock); do { int tmin = 0; @@ -922,21 +932,6 @@ static int __io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events, ret = 0; } while (min && !*nr_events && !need_resched()); - return ret; -} - -static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events, - long min) -{ - int ret; - - /* - * We disallow the app entering submit/complete with polling, but we - * still need to lock the ring to prevent racing with polled issue - * that got punted to a workqueue. - */ - mutex_lock(&ctx->uring_lock); - ret = __io_iopoll_check(ctx, nr_events, min); mutex_unlock(&ctx->uring_lock); return ret; } @@ -1662,6 +1657,11 @@ static int io_send_recvmsg(struct io_kiocb *req, const struct io_uring_sqe *sqe, else if (force_nonblock) flags |= MSG_DONTWAIT; +#ifdef CONFIG_COMPAT + if (req->ctx->compat) + flags |= MSG_CMSG_COMPAT; +#endif + msg = (struct user_msghdr __user *) (unsigned long) READ_ONCE(sqe->addr); @@ -1672,6 +1672,16 @@ static int io_send_recvmsg(struct io_kiocb *req, const struct io_uring_sqe *sqe, ret = -EINTR; } + if (req->fs) { + struct fs_struct *fs = req->fs; + + spin_lock(&req->fs->lock); + if (--fs->users) + fs = NULL; + spin_unlock(&req->fs->lock); + if (fs) + free_fs_struct(fs); + } io_cqring_add_event(req->ctx, sqe->user_data, ret); io_put_req(req); return 0; @@ -2168,6 +2178,7 @@ static inline bool io_sqe_needs_user(const struct io_uring_sqe *sqe) static void io_sq_wq_submit_work(struct work_struct *work) { struct io_kiocb *req = container_of(work, struct io_kiocb, work); + struct fs_struct *old_fs_struct = current->fs; struct io_ring_ctx *ctx = req->ctx; struct mm_struct *cur_mm = NULL; struct async_list *async_list; @@ -2187,6 +2198,15 @@ restart: /* Ensure we clear previously set non-block flag */ req->rw.ki_flags &= ~IOCB_NOWAIT; + if (req->fs != current->fs && current->fs != old_fs_struct) { + task_lock(current); + if (req->fs) + current->fs = req->fs; + else + current->fs = old_fs_struct; + task_unlock(current); + } + ret = 0; if (io_sqe_needs_user(sqe) && !cur_mm) { if (!mmget_not_zero(ctx->sqo_mm)) { @@ -2285,6 +2305,11 @@ out: mmput(cur_mm); } revert_creds(old_cred); + if (old_fs_struct) { + task_lock(current); + current->fs = old_fs_struct; + task_unlock(current); + } } /* @@ -2512,6 +2537,23 @@ err: req->user_data = s->sqe->user_data; +#if defined(CONFIG_NET) + switch (READ_ONCE(s->sqe->opcode)) { + case IORING_OP_SENDMSG: + case IORING_OP_RECVMSG: + spin_lock(¤t->fs->lock); + if (!current->fs->in_exec) { + req->fs = current->fs; + req->fs->users++; + } + spin_unlock(¤t->fs->lock); + if (!req->fs) { + ret = -EAGAIN; + goto err_req; + } + } +#endif + /* * If we already have a head request, queue this one for async * submittal once the head completes. If we don't have a head but @@ -2721,7 +2763,7 @@ static int io_sq_thread(void *data) */ mutex_lock(&ctx->uring_lock); if (!list_empty(&ctx->poll_list)) - __io_iopoll_check(ctx, &nr_events, 0); + io_iopoll_getevents(ctx, &nr_events, 0); else inflight = 0; mutex_unlock(&ctx->uring_lock); @@ -2741,16 +2783,6 @@ static int io_sq_thread(void *data) to_submit = io_sqring_entries(ctx); if (!to_submit) { /* - * We're polling. If we're within the defined idle - * period, then let us spin without work before going - * to sleep. - */ - if (inflight || !time_after(jiffies, timeout)) { - cond_resched(); - continue; - } - - /* * Drop cur_mm before scheduling, we can't hold it for * long periods (or over schedule()). Do this before * adding ourselves to the waitqueue, as the unuse/drop @@ -2762,6 +2794,16 @@ static int io_sq_thread(void *data) cur_mm = NULL; } + /* + * We're polling. If we're within the defined idle + * period, then let us spin without work before going + * to sleep. + */ + if (inflight || !time_after(jiffies, timeout)) { + cond_resched(); + continue; + } + prepare_to_wait(&ctx->sqo_wait, &wait, TASK_INTERRUPTIBLE); @@ -3721,6 +3763,9 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, mutex_lock(&ctx->uring_lock); submitted = io_ring_submit(ctx, to_submit); mutex_unlock(&ctx->uring_lock); + + if (submitted != to_submit) + goto out; } if (flags & IORING_ENTER_GETEVENTS) { unsigned nr_events = 0; @@ -3734,6 +3779,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, } } +out: percpu_ref_put(&ctx->refs); out_fput: fdput(f); @@ -3773,12 +3819,18 @@ static int io_allocate_scq_urings(struct io_ring_ctx *ctx, ctx->cq_entries = rings->cq_ring_entries; size = array_size(sizeof(struct io_uring_sqe), p->sq_entries); - if (size == SIZE_MAX) + if (size == SIZE_MAX) { + io_mem_free(ctx->rings); + ctx->rings = NULL; return -EOVERFLOW; + } ctx->sq_sqes = io_mem_alloc(size); - if (!ctx->sq_sqes) + if (!ctx->sq_sqes) { + io_mem_free(ctx->rings); + ctx->rings = NULL; return -ENOMEM; + } return 0; } @@ -3870,7 +3922,7 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p) ctx->account_mem = account_mem; ctx->user = user; - ctx->creds = prepare_creds(); + ctx->creds = get_current_cred(); if (!ctx->creds) { ret = -ENOMEM; goto err; |