When isolcpus=io_queue is enabled, and the last housekeeping CPU for aserving
given hctx would go offline, there would be no CPU left which handles
the IOs. To prevent IO stalls, prevent offlining housekeeping CPUs which
are still severing isolated CPUs..
Otherwise:
Signed-off-by: Daniel Wagner <wagi@xxxxxxxxxx>
---
block/blk-mq.c | 46 ++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 44 insertions(+), 2 deletions(-)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index c2697db591091200cdb9f6e082e472b829701e4c..aff17673b773583dfb2b01cb2f5f010c456bd834 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -3627,6 +3627,48 @@ static bool blk_mq_hctx_has_requests(struct blk_mq_hw_ctx *hctx)
return data.has_rq;
}
+static bool blk_mq_hctx_check_isolcpus_online(struct blk_mq_hw_ctx *hctx, unsigned int cpu)
+{
+ const struct cpumask *hk_mask;
+ int i;
+
+ if (!housekeeping_enabled(HK_TYPE_IO_QUEUE))
+ return true;
+
+ hk_mask = housekeeping_cpumask(HK_TYPE_IO_QUEUE);
+
+ for (i = 0; i < hctx->nr_ctx; i++) {
+ struct blk_mq_ctx *ctx = hctx->ctxs[i];
+
+ if (ctx->cpu == cpu)
+ continue;
+
+ /*
+ * Check if this context has at least one online
+ * housekeeping CPU in this case the hardware context is
+ * usable.
+ */
+ if (cpumask_test_cpu(ctx->cpu, hk_mask) &&
+ cpu_online(ctx->cpu))
+ break;
+
+ /*
+ * The context doesn't have any online housekeeping CPUs
+ * but there might be an online isolated CPU mapped to
+ * it.
+ */
+ if (cpu_is_offline(ctx->cpu))
+ continue;
+
+ pr_warn("%s: trying to offline hctx%d but there is still an online isolcpu CPU %d mapped to it\n",
+ hctx->queue->disk->disk_name,
+ hctx->queue_num, ctx->cpu);
+ return true;
+ }
+
+ return false;
+}
+
static bool blk_mq_hctx_has_online_cpu(struct blk_mq_hw_ctx *hctx,
unsigned int this_cpu)
{
@@ -3647,7 +3689,7 @@ static bool blk_mq_hctx_has_online_cpu(struct blk_mq_hw_ctx *hctx,
/* this hctx has at least one online CPU */
if (this_cpu != cpu)
- return true;
+ return blk_mq_hctx_check_isolcpus_online(hctx, this_cpu);
}
return false;
@@ -3659,7 +3701,7 @@ static int blk_mq_hctx_notify_offline(unsigned int cpu, struct hlist_node *node)
struct blk_mq_hw_ctx, cpuhp_online);
if (blk_mq_hctx_has_online_cpu(hctx, cpu))
- return 0;
+ return -EINVAL;
/*
* Prevent new request from being allocated on the current hctx.