summaryrefslogtreecommitdiff
path: root/fs/io_uring.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/io_uring.c')
-rw-r--r--fs/io_uring.c116
1 files changed, 84 insertions, 32 deletions
diff --git a/fs/io_uring.c b/fs/io_uring.c
index a340147387ec..e37b84146453 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -71,6 +71,7 @@
#include <linux/sizes.h>
#include <linux/hugetlb.h>
#include <linux/highmem.h>
+#include <linux/fs_struct.h>
#include <uapi/linux/io_uring.h>
@@ -239,7 +240,7 @@ struct io_ring_ctx {
struct user_struct *user;
- struct cred *creds;
+ const struct cred *creds;
struct completion ctx_done;
@@ -334,6 +335,8 @@ struct io_kiocb {
u32 result;
u32 sequence;
+ struct fs_struct *fs;
+
struct work_struct work;
};
@@ -651,6 +654,7 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
/* one is dropped after submission, the other at completion */
refcount_set(&req->refs, 2);
req->result = 0;
+ req->fs = NULL;
return req;
out:
percpu_ref_put(&ctx->refs);
@@ -882,11 +886,17 @@ static void io_iopoll_reap_events(struct io_ring_ctx *ctx)
mutex_unlock(&ctx->uring_lock);
}
-static int __io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events,
- long min)
+static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events,
+ long min)
{
int iters = 0, ret = 0;
+ /*
+ * We disallow the app entering submit/complete with polling, but we
+ * still need to lock the ring to prevent racing with polled issue
+ * that got punted to a workqueue.
+ */
+ mutex_lock(&ctx->uring_lock);
do {
int tmin = 0;
@@ -922,21 +932,6 @@ static int __io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events,
ret = 0;
} while (min && !*nr_events && !need_resched());
- return ret;
-}
-
-static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events,
- long min)
-{
- int ret;
-
- /*
- * We disallow the app entering submit/complete with polling, but we
- * still need to lock the ring to prevent racing with polled issue
- * that got punted to a workqueue.
- */
- mutex_lock(&ctx->uring_lock);
- ret = __io_iopoll_check(ctx, nr_events, min);
mutex_unlock(&ctx->uring_lock);
return ret;
}
@@ -1662,6 +1657,11 @@ static int io_send_recvmsg(struct io_kiocb *req, const struct io_uring_sqe *sqe,
else if (force_nonblock)
flags |= MSG_DONTWAIT;
+#ifdef CONFIG_COMPAT
+ if (req->ctx->compat)
+ flags |= MSG_CMSG_COMPAT;
+#endif
+
msg = (struct user_msghdr __user *) (unsigned long)
READ_ONCE(sqe->addr);
@@ -1672,6 +1672,16 @@ static int io_send_recvmsg(struct io_kiocb *req, const struct io_uring_sqe *sqe,
ret = -EINTR;
}
+ if (req->fs) {
+ struct fs_struct *fs = req->fs;
+
+ spin_lock(&req->fs->lock);
+ if (--fs->users)
+ fs = NULL;
+ spin_unlock(&req->fs->lock);
+ if (fs)
+ free_fs_struct(fs);
+ }
io_cqring_add_event(req->ctx, sqe->user_data, ret);
io_put_req(req);
return 0;
@@ -2168,6 +2178,7 @@ static inline bool io_sqe_needs_user(const struct io_uring_sqe *sqe)
static void io_sq_wq_submit_work(struct work_struct *work)
{
struct io_kiocb *req = container_of(work, struct io_kiocb, work);
+ struct fs_struct *old_fs_struct = current->fs;
struct io_ring_ctx *ctx = req->ctx;
struct mm_struct *cur_mm = NULL;
struct async_list *async_list;
@@ -2187,6 +2198,15 @@ restart:
/* Ensure we clear previously set non-block flag */
req->rw.ki_flags &= ~IOCB_NOWAIT;
+ if (req->fs != current->fs && current->fs != old_fs_struct) {
+ task_lock(current);
+ if (req->fs)
+ current->fs = req->fs;
+ else
+ current->fs = old_fs_struct;
+ task_unlock(current);
+ }
+
ret = 0;
if (io_sqe_needs_user(sqe) && !cur_mm) {
if (!mmget_not_zero(ctx->sqo_mm)) {
@@ -2285,6 +2305,11 @@ out:
mmput(cur_mm);
}
revert_creds(old_cred);
+ if (old_fs_struct) {
+ task_lock(current);
+ current->fs = old_fs_struct;
+ task_unlock(current);
+ }
}
/*
@@ -2512,6 +2537,23 @@ err:
req->user_data = s->sqe->user_data;
+#if defined(CONFIG_NET)
+ switch (READ_ONCE(s->sqe->opcode)) {
+ case IORING_OP_SENDMSG:
+ case IORING_OP_RECVMSG:
+ spin_lock(&current->fs->lock);
+ if (!current->fs->in_exec) {
+ req->fs = current->fs;
+ req->fs->users++;
+ }
+ spin_unlock(&current->fs->lock);
+ if (!req->fs) {
+ ret = -EAGAIN;
+ goto err_req;
+ }
+ }
+#endif
+
/*
* If we already have a head request, queue this one for async
* submittal once the head completes. If we don't have a head but
@@ -2721,7 +2763,7 @@ static int io_sq_thread(void *data)
*/
mutex_lock(&ctx->uring_lock);
if (!list_empty(&ctx->poll_list))
- __io_iopoll_check(ctx, &nr_events, 0);
+ io_iopoll_getevents(ctx, &nr_events, 0);
else
inflight = 0;
mutex_unlock(&ctx->uring_lock);
@@ -2741,16 +2783,6 @@ static int io_sq_thread(void *data)
to_submit = io_sqring_entries(ctx);
if (!to_submit) {
/*
- * We're polling. If we're within the defined idle
- * period, then let us spin without work before going
- * to sleep.
- */
- if (inflight || !time_after(jiffies, timeout)) {
- cond_resched();
- continue;
- }
-
- /*
* Drop cur_mm before scheduling, we can't hold it for
* long periods (or over schedule()). Do this before
* adding ourselves to the waitqueue, as the unuse/drop
@@ -2762,6 +2794,16 @@ static int io_sq_thread(void *data)
cur_mm = NULL;
}
+ /*
+ * We're polling. If we're within the defined idle
+ * period, then let us spin without work before going
+ * to sleep.
+ */
+ if (inflight || !time_after(jiffies, timeout)) {
+ cond_resched();
+ continue;
+ }
+
prepare_to_wait(&ctx->sqo_wait, &wait,
TASK_INTERRUPTIBLE);
@@ -3721,6 +3763,9 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
mutex_lock(&ctx->uring_lock);
submitted = io_ring_submit(ctx, to_submit);
mutex_unlock(&ctx->uring_lock);
+
+ if (submitted != to_submit)
+ goto out;
}
if (flags & IORING_ENTER_GETEVENTS) {
unsigned nr_events = 0;
@@ -3734,6 +3779,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
}
}
+out:
percpu_ref_put(&ctx->refs);
out_fput:
fdput(f);
@@ -3773,12 +3819,18 @@ static int io_allocate_scq_urings(struct io_ring_ctx *ctx,
ctx->cq_entries = rings->cq_ring_entries;
size = array_size(sizeof(struct io_uring_sqe), p->sq_entries);
- if (size == SIZE_MAX)
+ if (size == SIZE_MAX) {
+ io_mem_free(ctx->rings);
+ ctx->rings = NULL;
return -EOVERFLOW;
+ }
ctx->sq_sqes = io_mem_alloc(size);
- if (!ctx->sq_sqes)
+ if (!ctx->sq_sqes) {
+ io_mem_free(ctx->rings);
+ ctx->rings = NULL;
return -ENOMEM;
+ }
return 0;
}
@@ -3870,7 +3922,7 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p)
ctx->account_mem = account_mem;
ctx->user = user;
- ctx->creds = prepare_creds();
+ ctx->creds = get_current_cred();
if (!ctx->creds) {
ret = -ENOMEM;
goto err;