[PATCH v6 9/9] blk-mq: prevent offlining hk CPU with associated online isolated CPUs

From: Daniel Wagner
Date: Thu Apr 24 2025 - 14:22:34 EST


When isolcpus=io_queue is enabled, and the last housekeeping CPU for a
given hctx would go offline, there would be no CPU left which handles
the IOs. To prevent IO stalls, prevent offlining housekeeping CPUs which
are still severing isolated CPUs..

Signed-off-by: Daniel Wagner <wagi@xxxxxxxxxx>
---
block/blk-mq.c | 46 ++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 44 insertions(+), 2 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index c2697db591091200cdb9f6e082e472b829701e4c..aff17673b773583dfb2b01cb2f5f010c456bd834 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -3627,6 +3627,48 @@ static bool blk_mq_hctx_has_requests(struct blk_mq_hw_ctx *hctx)
return data.has_rq;
}

+static bool blk_mq_hctx_check_isolcpus_online(struct blk_mq_hw_ctx *hctx, unsigned int cpu)
+{
+ const struct cpumask *hk_mask;
+ int i;
+
+ if (!housekeeping_enabled(HK_TYPE_IO_QUEUE))
+ return true;
+
+ hk_mask = housekeeping_cpumask(HK_TYPE_IO_QUEUE);
+
+ for (i = 0; i < hctx->nr_ctx; i++) {
+ struct blk_mq_ctx *ctx = hctx->ctxs[i];
+
+ if (ctx->cpu == cpu)
+ continue;
+
+ /*
+ * Check if this context has at least one online
+ * housekeeping CPU in this case the hardware context is
+ * usable.
+ */
+ if (cpumask_test_cpu(ctx->cpu, hk_mask) &&
+ cpu_online(ctx->cpu))
+ break;
+
+ /*
+ * The context doesn't have any online housekeeping CPUs
+ * but there might be an online isolated CPU mapped to
+ * it.
+ */
+ if (cpu_is_offline(ctx->cpu))
+ continue;
+
+ pr_warn("%s: trying to offline hctx%d but there is still an online isolcpu CPU %d mapped to it\n",
+ hctx->queue->disk->disk_name,
+ hctx->queue_num, ctx->cpu);
+ return true;
+ }
+
+ return false;
+}
+
static bool blk_mq_hctx_has_online_cpu(struct blk_mq_hw_ctx *hctx,
unsigned int this_cpu)
{
@@ -3647,7 +3689,7 @@ static bool blk_mq_hctx_has_online_cpu(struct blk_mq_hw_ctx *hctx,

/* this hctx has at least one online CPU */
if (this_cpu != cpu)
- return true;
+ return blk_mq_hctx_check_isolcpus_online(hctx, this_cpu);
}

return false;
@@ -3659,7 +3701,7 @@ static int blk_mq_hctx_notify_offline(unsigned int cpu, struct hlist_node *node)
struct blk_mq_hw_ctx, cpuhp_online);

if (blk_mq_hctx_has_online_cpu(hctx, cpu))
- return 0;
+ return -EINVAL;

/*
* Prevent new request from being allocated on the current hctx.

--
2.49.0