[PATCH] blk-wbt: get back the missed wakeup from __wbt_done

From: Jianchao Wang
Date: Thu Aug 23 2018 - 09:08:09 EST


2887e41 (blk-wbt: Avoid lock contention and thundering herd
issue in wbt_wait) introduces two cases that could miss wakeup:
- __wbt_done only wakes up one waiter one time. There could be
multiple waiters and (limit - inflight) > 1 at the moment.

- When the waiter is waked up, it is still on wait queue and set
to TASK_UNINTERRUPTIBLE immediately, so this waiter could be
waked up one more time. If a __wbt_done comes and wakes up
again, the prevous waiter may waste a wakeup.

To fix them and avoid to introduce too much lock contention, we
introduce our own wake up func wbt_wake_function in __wbt_wait and
use wake_up_all in __wbt_done. wbt_wake_function will try to get
wbt budget firstly, if sucesses, wake up the process, otherwise,
return -1 to interrupt the wake up loop.

Signed-off-by: Jianchao Wang <jianchao.w.wang@xxxxxxxxxx>
Fixes: 2887e41 (blk-wbt: Avoid lock contention and thundering herd issue in wbt_wait)
Cc: Anchal Agarwal <anchalag@xxxxxxxxxx>
Cc: Frank van der Linden <fllinden@xxxxxxxxxx>
---
block/blk-wbt.c | 78 +++++++++++++++++++++++++++++++++++++++------------------
1 file changed, 54 insertions(+), 24 deletions(-)

diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index c9358f1..2667590 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -166,7 +166,7 @@ static void __wbt_done(struct rq_qos *rqos, enum wbt_flags wb_acct)
int diff = limit - inflight;

if (!inflight || diff >= rwb->wb_background / 2)
- wake_up(&rqw->wait);
+ wake_up_all(&rqw->wait);
}
}

@@ -481,6 +481,40 @@ static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw)
return limit;
}

+struct wbt_wait_data {
+ struct task_struct *curr;
+ struct rq_wb *rwb;
+ struct rq_wait *rqw;
+ unsigned long rw;
+};
+
+static int wbt_wake_function(wait_queue_entry_t *curr, unsigned int mode,
+ int wake_flags, void *key)
+{
+ struct wbt_wait_data *data = curr->private;
+
+ /*
+ * If fail to get budget, return -1 to interrupt the wake up
+ * loop in __wake_up_common.
+ */
+ if (!rq_wait_inc_below(data->rqw, get_limit(data->rwb, data->rw)))
+ return -1;
+
+ wake_up_process(data->curr);
+
+ list_del_init(&curr->entry);
+ return 1;
+}
+
+static inline void wbt_init_wait(struct wait_queue_entry *wait,
+ struct wbt_wait_data *data)
+{
+ INIT_LIST_HEAD(&wait->entry);
+ wait->flags = 0;
+ wait->func = wbt_wake_function;
+ wait->private = data;
+}
+
/*
* Block if we will exceed our limit, or if we are currently waiting for
* the timer to kick off queuing again.
@@ -491,31 +525,27 @@ static void __wbt_wait(struct rq_wb *rwb, enum wbt_flags wb_acct,
__acquires(lock)
{
struct rq_wait *rqw = get_rq_wait(rwb, wb_acct);
- DECLARE_WAITQUEUE(wait, current);
- bool has_sleeper;
-
- has_sleeper = wq_has_sleeper(&rqw->wait);
- if (!has_sleeper && rq_wait_inc_below(rqw, get_limit(rwb, rw)))
+ struct wait_queue_entry wait;
+ struct wbt_wait_data data = {
+ .curr = current,
+ .rwb = rwb,
+ .rqw = rqw,
+ .rw = rw,
+ };
+
+ if (!wq_has_sleeper(&rqw->wait) &&
+ rq_wait_inc_below(rqw, get_limit(rwb, rw)))
return;

- add_wait_queue_exclusive(&rqw->wait, &wait);
- do {
- set_current_state(TASK_UNINTERRUPTIBLE);
-
- if (!has_sleeper && rq_wait_inc_below(rqw, get_limit(rwb, rw)))
- break;
-
- if (lock) {
- spin_unlock_irq(lock);
- io_schedule();
- spin_lock_irq(lock);
- } else
- io_schedule();
- has_sleeper = false;
- } while (1);
-
- __set_current_state(TASK_RUNNING);
- remove_wait_queue(&rqw->wait, &wait);
+ wbt_init_wait(&wait, &data);
+ prepare_to_wait_exclusive(&rqw->wait, &wait,
+ TASK_UNINTERRUPTIBLE);
+ if (lock) {
+ spin_unlock_irq(lock);
+ io_schedule();
+ spin_lock_irq(lock);
+ } else
+ io_schedule();
}

static inline bool wbt_should_throttle(struct rq_wb *rwb, struct bio *bio)
--
2.7.4