[PATCH 2/2] io_uring: use signal based task_work running

From: Jens Axboe
Date: Tue Jun 30 2020 - 14:45:35 EST


Since 5.7, we've been using task_work to trigger async running of
requests in the context of the original task. This generally works
great, but there's a case where if the task is currently blocked
in the kernel waiting on a condition to become true, it won't process
task_work. Even though the task is woken, it just checks whatever
condition it's waiting on, and goes back to sleep if it's still false.

This is a problem if that very condition only becomes true when that
task_work is run. An example of that is the task registering an eventfd
with io_uring, and it's now blocked waiting on an eventfd read. That
read could depend on a completion event, and that completion event
won't get trigged until task_work has been run.

Use the TWA_SIGNAL notification for task_work, so that we ensure that
the task always runs the work when queued.

Cc: stable@xxxxxxxxxxxxxxx # v5.7
Signed-off-by: Jens Axboe <axboe@xxxxxxxxx>
---
fs/io_uring.c | 30 ++++++++++++++++++++++++------
1 file changed, 24 insertions(+), 6 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index e507737f044e..476f03b42777 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -4072,6 +4072,23 @@ struct io_poll_table {
int error;
};

+static int io_req_task_work_add(struct io_kiocb *req, struct callback_head *cb,
+ int notify)
+{
+ const bool is_sqthread = (req->ctx->flags & IORING_SETUP_SQPOLL) != 0;
+ struct task_struct *tsk = req->task;
+ int ret;
+
+ if (is_sqthread)
+ notify = 0;
+
+ ret = task_work_add(tsk, cb, notify);
+
+ if (!ret && is_sqthread)
+ wake_up_process(tsk);
+ return ret;
+}
+
static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll,
__poll_t mask, task_work_func_t func)
{
@@ -4095,13 +4112,13 @@ static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll,
* of executing it. We can't safely execute it anyway, as we may not
* have the needed state needed for it anyway.
*/
- ret = task_work_add(tsk, &req->task_work, true);
+ ret = io_req_task_work_add(req, &req->task_work, TWA_SIGNAL);
if (unlikely(ret)) {
WRITE_ONCE(poll->canceled, true);
tsk = io_wq_get_task(req->ctx->io_wq);
- task_work_add(tsk, &req->task_work, true);
+ task_work_add(tsk, &req->task_work, 0);
+ wake_up_process(tsk);
}
- wake_up_process(tsk);
return 1;
}

@@ -6182,15 +6199,16 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
do {
prepare_to_wait_exclusive(&ctx->wait, &iowq.wq,
TASK_INTERRUPTIBLE);
+ /* make sure we run task_work before checking for signals */
if (current->task_works)
task_work_run();
- if (io_should_wake(&iowq, false))
- break;
- schedule();
if (signal_pending(current)) {
ret = -EINTR;
break;
}
+ if (io_should_wake(&iowq, false))
+ break;
+ schedule();
} while (1);
finish_wait(&ctx->wait, &iowq.wq);

--
2.27.0