[PATCH 5.4 01/65] io_uring: make sure async workqueue is canceled on exit

From: Greg Kroah-Hartman
Date: Tue Jul 07 2020 - 11:28:21 EST


From: Jens Axboe <axboe@xxxxxxxxx>

Track async work items that we queue, so we can safely cancel them
if the ring is closed or the process exits. Newer kernels handle
this automatically with io-wq, but the old workqueue based setup needs
a bit of special help to get there.

There's no upstream variant of this, as that would require backporting
all the io-wq changes from 5.5 and on. Hence I made a one-off that
ensures that we don't leak memory if we have async work items that
need active cancelation (like socket IO).

Reported-by: Agarwal, Anchal <anchalag@xxxxxxxxxx>
Tested-by: Agarwal, Anchal <anchalag@xxxxxxxxxx>
Signed-off-by: Jens Axboe <axboe@xxxxxxxxx>
Signed-off-by: Sasha Levin <sashal@xxxxxxxxxx>
---
fs/io_uring.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 63 insertions(+)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 7fa3cd3fff4d2..e0200406765c3 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -267,6 +267,9 @@ struct io_ring_ctx {
#if defined(CONFIG_UNIX)
struct socket *ring_sock;
#endif
+
+ struct list_head task_list;
+ spinlock_t task_lock;
};

struct sqe_submit {
@@ -331,14 +334,18 @@ struct io_kiocb {
#define REQ_F_ISREG 2048 /* regular file */
#define REQ_F_MUST_PUNT 4096 /* must be punted even for NONBLOCK */
#define REQ_F_TIMEOUT_NOSEQ 8192 /* no timeout sequence */
+#define REQ_F_CANCEL 16384 /* cancel request */
unsigned long fsize;
u64 user_data;
u32 result;
u32 sequence;
+ struct task_struct *task;

struct fs_struct *fs;

struct work_struct work;
+ struct task_struct *work_task;
+ struct list_head task_list;
};

#define IO_PLUG_THRESHOLD 2
@@ -425,6 +432,8 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
INIT_LIST_HEAD(&ctx->cancel_list);
INIT_LIST_HEAD(&ctx->defer_list);
INIT_LIST_HEAD(&ctx->timeout_list);
+ INIT_LIST_HEAD(&ctx->task_list);
+ spin_lock_init(&ctx->task_lock);
return ctx;
}

@@ -492,6 +501,7 @@ static void __io_commit_cqring(struct io_ring_ctx *ctx)
static inline void io_queue_async_work(struct io_ring_ctx *ctx,
struct io_kiocb *req)
{
+ unsigned long flags;
int rw = 0;

if (req->submit.sqe) {
@@ -503,6 +513,13 @@ static inline void io_queue_async_work(struct io_ring_ctx *ctx,
}
}

+ req->task = current;
+
+ spin_lock_irqsave(&ctx->task_lock, flags);
+ list_add(&req->task_list, &ctx->task_list);
+ req->work_task = NULL;
+ spin_unlock_irqrestore(&ctx->task_lock, flags);
+
queue_work(ctx->sqo_wq[rw], &req->work);
}

@@ -2201,6 +2218,8 @@ static void io_sq_wq_submit_work(struct work_struct *work)

old_cred = override_creds(ctx->creds);
async_list = io_async_list_from_sqe(ctx, req->submit.sqe);
+
+ allow_kernel_signal(SIGINT);
restart:
do {
struct sqe_submit *s = &req->submit;
@@ -2232,6 +2251,12 @@ static void io_sq_wq_submit_work(struct work_struct *work)
}

if (!ret) {
+ req->work_task = current;
+ if (req->flags & REQ_F_CANCEL) {
+ ret = -ECANCELED;
+ goto end_req;
+ }
+
s->has_user = cur_mm != NULL;
s->needs_lock = true;
do {
@@ -2246,6 +2271,12 @@ static void io_sq_wq_submit_work(struct work_struct *work)
break;
cond_resched();
} while (1);
+end_req:
+ if (!list_empty(&req->task_list)) {
+ spin_lock_irq(&ctx->task_lock);
+ list_del_init(&req->task_list);
+ spin_unlock_irq(&ctx->task_lock);
+ }
}

/* drop submission reference */
@@ -2311,6 +2342,7 @@ static void io_sq_wq_submit_work(struct work_struct *work)
}

out:
+ disallow_signal(SIGINT);
if (cur_mm) {
set_fs(old_fs);
unuse_mm(cur_mm);
@@ -3675,12 +3707,32 @@ static int io_uring_fasync(int fd, struct file *file, int on)
return fasync_helper(fd, file, on, &ctx->cq_fasync);
}

+static void io_cancel_async_work(struct io_ring_ctx *ctx,
+ struct task_struct *task)
+{
+ if (list_empty(&ctx->task_list))
+ return;
+
+ spin_lock_irq(&ctx->task_lock);
+ while (!list_empty(&ctx->task_list)) {
+ struct io_kiocb *req;
+
+ req = list_first_entry(&ctx->task_list, struct io_kiocb, task_list);
+ list_del_init(&req->task_list);
+ req->flags |= REQ_F_CANCEL;
+ if (req->work_task && (!task || req->task == task))
+ send_sig(SIGINT, req->work_task, 1);
+ }
+ spin_unlock_irq(&ctx->task_lock);
+}
+
static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
{
mutex_lock(&ctx->uring_lock);
percpu_ref_kill(&ctx->refs);
mutex_unlock(&ctx->uring_lock);

+ io_cancel_async_work(ctx, NULL);
io_kill_timeouts(ctx);
io_poll_remove_all(ctx);
io_iopoll_reap_events(ctx);
@@ -3688,6 +3740,16 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
io_ring_ctx_free(ctx);
}

+static int io_uring_flush(struct file *file, void *data)
+{
+ struct io_ring_ctx *ctx = file->private_data;
+
+ if (fatal_signal_pending(current) || (current->flags & PF_EXITING))
+ io_cancel_async_work(ctx, current);
+
+ return 0;
+}
+
static int io_uring_release(struct inode *inode, struct file *file)
{
struct io_ring_ctx *ctx = file->private_data;
@@ -3792,6 +3854,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,

static const struct file_operations io_uring_fops = {
.release = io_uring_release,
+ .flush = io_uring_flush,
.mmap = io_uring_mmap,
.poll = io_uring_poll,
.fasync = io_uring_fasync,
--
2.25.1