Re: [GIT PULL] aio: fix sleeping while TASK_INTERRUPTIBLE

From: Peter Zijlstra
Date: Tue Feb 03 2015 - 06:55:43 EST


On Tue, Feb 03, 2015 at 12:33:48PM +0100, Peter Zijlstra wrote:
> > block/bsg.c- prepare_to_wait(&bd->wq_done, &wait, TASK_UNINTERRUPTIBLE);
> > block/bsg.c- spin_unlock_irq(&bd->lock);
> > block/bsg.c: io_schedule();
> > block/bsg.c- finish_wait(&bd->wq_done, &wait);
> >
> > Which is double buggy because:
> > 1) it doesn't loop
> > 2) it sets TASK_UNINTERRUPTIBLE _after_ testing for the sleep event.
>
> OK, actually had a look at this one; it might be ok.
>
> The spinlock might fully serialize the state so no fails, and the entire
> function is called in a loop. Still seriously obtuse code.

Jens, would something like the below work for you?

---
block/bsg.c | 72 ++++++++++++++++++----------------------------------
include/linux/wait.h | 15 +++++++++++
2 files changed, 40 insertions(+), 47 deletions(-)

diff --git a/block/bsg.c b/block/bsg.c
index 276e869e686c..d214e929ce18 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -136,42 +136,6 @@ static inline struct hlist_head *bsg_dev_idx_hash(int index)
return &bsg_device_list[index & (BSG_LIST_ARRAY_SIZE - 1)];
}

-static int bsg_io_schedule(struct bsg_device *bd)
-{
- DEFINE_WAIT(wait);
- int ret = 0;
-
- spin_lock_irq(&bd->lock);
-
- BUG_ON(bd->done_cmds > bd->queued_cmds);
-
- /*
- * -ENOSPC or -ENODATA? I'm going for -ENODATA, meaning "I have no
- * work to do", even though we return -ENOSPC after this same test
- * during bsg_write() -- there, it means our buffer can't have more
- * bsg_commands added to it, thus has no space left.
- */
- if (bd->done_cmds == bd->queued_cmds) {
- ret = -ENODATA;
- goto unlock;
- }
-
- if (!test_bit(BSG_F_BLOCK, &bd->flags)) {
- ret = -EAGAIN;
- goto unlock;
- }
-
- prepare_to_wait(&bd->wq_done, &wait, TASK_UNINTERRUPTIBLE);
- spin_unlock_irq(&bd->lock);
- io_schedule();
- finish_wait(&bd->wq_done, &wait);
-
- return ret;
-unlock:
- spin_unlock_irq(&bd->lock);
- return ret;
-}
-
static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq,
struct sg_io_v4 *hdr, struct bsg_device *bd,
fmode_t has_write_perm)
@@ -482,6 +446,30 @@ static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr,
return ret;
}

+static bool bsg_complete(struct bsg_device *bd)
+{
+ bool ret = false;
+ bool spin;
+
+ do {
+ spin_lock_irq(&bd->lock);
+
+ BUG_ON(bd->done_cmds > bd->queued_cmds);
+
+ /*
+ * All commands consumed.
+ */
+ if (bd->done_cmds == bd->queued_cmds)
+ ret = true;
+
+ spin = !test_bit(BSG_F_BLOCK, &bd->flags);
+
+ spin_unlock_irq(&bd->lock);
+ } while (!ret && spin);
+
+ return ret;
+}
+
static int bsg_complete_all_commands(struct bsg_device *bd)
{
struct bsg_command *bc;
@@ -492,17 +480,7 @@ static int bsg_complete_all_commands(struct bsg_device *bd)
/*
* wait for all commands to complete
*/
- ret = 0;
- do {
- ret = bsg_io_schedule(bd);
- /*
- * look for -ENODATA specifically -- we'll sometimes get
- * -ERESTARTSYS when we've taken a signal, but we can't
- * return until we're done freeing the queue, so ignore
- * it. The signal will get handled when we're done freeing
- * the bsg_device.
- */
- } while (ret != -ENODATA);
+ io_wait_event(bd->wq_done, bsg_complete(bd));

/*
* discard done commands
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 2232ed16635a..71fc1d31e48d 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -267,6 +267,21 @@ do { \
__wait_event(wq, condition); \
} while (0)

+#define __io_wait_event(wq, condition) \
+ (void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0, \
+ io_schedule())
+
+/*
+ * io_wait_event() -- like wait_event() but with io_schedule()
+ */
+#define io_wait_event(wq, condition) \
+do { \
+ might_sleep(); \
+ if (condition) \
+ break; \
+ __io_wait_event(wq, condition); \
+} while (0)
+
#define __wait_event_freezable(wq, condition) \
___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0, \
schedule(); try_to_freeze())
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/