[PATCH] sched/wait: Introduce new, more compact wait_event*() primitives

From: Ingo Molnar
Date: Sun Mar 05 2017 - 08:28:09 EST


Turn the wait_event() interface into a state machine.

Only very lightly tested, but should demonstrate the principle.

Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: linux-kernel@xxxxxxxxxxxxxxx
NOT-Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx>
---
include/linux/wait.h | 29 +++++++++++++++++++++++++-
kernel/sched/wait.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 86 insertions(+), 1 deletion(-)

diff --git a/include/linux/wait.h b/include/linux/wait.h
index ead731ef5632..285f282c928e 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -225,6 +225,31 @@ void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode, int nr);

extern void init_wait_entry(struct wait_queue_entry *wq_entry, int flags);

+struct wait_event_state {
+ bool queued;
+ bool prepared;
+ bool done;
+
+ long ret;
+ struct wait_queue_entry wq_entry;
+};
+
+extern long wait_event_loop(struct wait_queue_head *wq_head, struct wait_event_state *wes, int condition);
+
+#define wait_event_v2(wq_head, condition) \
+({ \
+ struct wait_event_state __wes; \
+ long __ret; \
+ \
+ __wes.queued = 0; \
+ \
+ do { \
+ __ret = wait_event_loop(&(wq_head), &__wes, (condition) != 0); \
+ } while (!__wes.done); \
+ \
+ __ret; \
+})
+
/*
* The below macro ___wait_event() has an explicit shadow of the __ret
* variable when used from the wait_event_*() macros.
@@ -277,7 +302,7 @@ __out: __ret; \
* wake_up() has to be called after changing any variable that could
* change the result of the wait condition.
*/
-#define wait_event(wq_head, condition) \
+#define wait_event_v1(wq_head, condition) \
do { \
might_sleep(); \
if (condition) \
@@ -285,6 +310,8 @@ do { \
__wait_event(wq_head, condition); \
} while (0)

+#define wait_event wait_event_v2
+
#define __io_wait_event(wq_head, condition) \
(void)___wait_event(wq_head, condition, TASK_UNINTERRUPTIBLE, 0, 0, \
io_schedule())
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 48794482d9ac..4542d9f6a5a4 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -293,6 +293,64 @@ static inline bool is_kthread_should_stop(void)
}

/*
+ * The main wait_event*() event loop iteration state machine.
+ *
+ * Note that this function itself does not loop, it returns to
+ * the caller to evaluate the call site dependent condition in
+ * every iteration.
+ */
+long wait_event_loop(struct wait_queue_head *wq_head, struct wait_event_state *wes, int condition)
+{
+ if (!wes->queued) {
+ might_sleep();
+
+ /*
+ * If we are not initialized yet and the condition is already
+ * met, we can return immediately:
+ */
+ if (condition) {
+ wes->done = 1;
+ return 0;
+ }
+
+ /* Set up the wait-queue entry: */
+ init_wait_entry(&wes->wq_entry, 0);
+
+ wes->done = 0;
+ wes->queued = 1;
+ wes->prepared = 0;
+ wes->ret = 0;
+ } else {
+ /* Here is where we notice an updated wait condition: */
+ if (condition) {
+ finish_wait(wq_head, &wes->wq_entry);
+ wes->done = 1;
+ return 0;
+ }
+ }
+
+ if (!wes->prepared) {
+prepare_again:
+ wes->ret = prepare_to_wait_event(wq_head, &wes->wq_entry, 0);
+ wes->prepared = 1;
+
+ return 0;
+ }
+
+ if (___wait_is_interruptible(0) && wes->ret) {
+ /* We already got dequeued, so mark it done: */
+ wes->done = 1;
+
+ /* But return any eventual interruption code: */
+ return wes->ret;
+ }
+
+ schedule();
+ goto prepare_again;
+}
+EXPORT_SYMBOL_GPL(wait_event_loop);
+
+/*
* DEFINE_WAIT_FUNC(wait, woken_wake_func);
*
* add_wait_queue(&wq_head, &wait);