[PATCH 2/4] blkcg: Restructure blkg_conf_prep() and friends

From: Tejun Heo
Date: Tue Jan 10 2023 - 17:27:47 EST


We want to support lazy init of rq-qos policies so that iolatency is enabled
lazily on configuration instead of gendisk initialization. The way blkg
config helpers are structured now is a bit awkward for that. Let's
restructure:

* blkcg_conf_open_bdev() is renamed to blkg_conf_open_bdev(). The blkcg_
prefix was used because the bdev opening step is blkg-independent.
However, the distinction is too subtle and confuses more than helps. Let's
switch to blkg prefix so that it's consistent with the type and other
helper names.

* struct blkg_conf_ctx now remembers the original input string and is always
initialized by the new blkg_conf_init().

* blkg_conf_open_bdev() is updated to take a pointer to blkg_conf_ctx like
blkg_conf_prep() and can be called multiple times safely. Instead of
modifying the double pointer to input string directly,
blkg_conf_open_bdev() now sets blkg_conf_ctx->body.

* blkg_conf_finish() is renamed to blkg_conf_exit() for symmetry and now
must be called on all blkg_conf_ctx's which were initialized with
blkg_conf_init().

Combined, this allows the users to either open the bdev first or do it
altogether with blkg_conf_prep() which will help implementing lazy init of
rq-qos policies.

blkg_conf_init/exit() will also be used implement synchronization against
device removal. This is necessary because iolat / iocost are configured
through cgroupfs instead of one of the files under /sys/block/DEVICE. As
cgroupfs operations aren't synchronized with block layer, the lazy init and
other configuration operations may race against device removal. This patch
makes blkg_conf_init/exit() used consistently for all cgroup-orginating
configurations making them a good place to implement explicit
synchronization.

Users are updated accordingly. No behavior change is intended by this patch.

v2: bfq wasn't updated in v1 causing a build error. Fixed.

v3: Update the description to include future use of blkg_conf_init/exit() as
synchronization points.

Signed-off-by: Tejun Heo <tj@xxxxxxxxxx>
Cc: Josef Bacik <josef@xxxxxxxxxxxxxx>
Cc: Christoph Hellwig <hch@xxxxxx>
Cc: Yu Kuai <yukuai1@xxxxxxxxxxxxxxx>
---
block/bfq-cgroup.c | 8 ++--
block/blk-cgroup.c | 105 +++++++++++++++++++++++++++---------------
block/blk-cgroup.h | 10 ++--
block/blk-iocost.c | 58 +++++++++++++----------
block/blk-iolatency.c | 8 ++--
block/blk-throttle.c | 16 ++++---
6 files changed, 127 insertions(+), 78 deletions(-)

diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index a6e8da5f5cfd..97925793aee4 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -1115,9 +1115,11 @@ static ssize_t bfq_io_set_device_weight(struct kernfs_open_file *of,
struct bfq_group *bfqg;
u64 v;

- ret = blkg_conf_prep(blkcg, &blkcg_policy_bfq, buf, &ctx);
+ blkg_conf_init(&ctx, buf);
+
+ ret = blkg_conf_prep(blkcg, &blkcg_policy_bfq, &ctx);
if (ret)
- return ret;
+ goto out;

if (sscanf(ctx.body, "%llu", &v) == 1) {
/* require "default" on dfl */
@@ -1139,7 +1141,7 @@ static ssize_t bfq_io_set_device_weight(struct kernfs_open_file *of,
ret = 0;
}
out:
- blkg_conf_finish(&ctx);
+ blkg_conf_exit(&ctx);
return ret ?: nbytes;
}

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 7d338bc84387..18fbdea1f756 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -626,69 +626,93 @@ u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v)
EXPORT_SYMBOL_GPL(__blkg_prfill_u64);

/**
- * blkcg_conf_open_bdev - parse and open bdev for per-blkg config update
- * @inputp: input string pointer
+ * blkg_conf_init - initialize a blkg_conf_ctx
+ * @ctx: blkg_conf_ctx to initialize
+ * @input: input string
*
- * Parse the device node prefix part, MAJ:MIN, of per-blkg config update
- * from @input and get and return the matching bdev. *@inputp is
- * updated to point past the device node prefix. Returns an ERR_PTR()
- * value on error.
+ * Initialize @ctx which can be used to parse blkg config input string @input.
+ * Once initialized, @ctx can be used with blkg_conf_open_bdev() and
+ * blkg_conf_prep(), and must be cleaned up with blkg_conf_exit().
+ */
+void blkg_conf_init(struct blkg_conf_ctx *ctx, char *input)
+{
+ *ctx = (struct blkg_conf_ctx){ .input = input };
+}
+EXPORT_SYMBOL_GPL(blkg_conf_init);
+
+/**
+ * blkg_conf_open_bdev - parse and open bdev for per-blkg config update
+ * @ctx: blkg_conf_ctx initialized with blkg_conf_init()
*
- * Use this function iff blkg_conf_prep() can't be used for some reason.
+ * Parse the device node prefix part, MAJ:MIN, of per-blkg config update from
+ * @ctx->input and get and store the matching bdev in @ctx->bdev. @ctx->body is
+ * set to point past the device node prefix.
+ *
+ * This function may be called multiple times on @ctx and the extra calls become
+ * NOOPs. blkg_conf_prep() implicitly calls this function. Use this function
+ * explicitly if bdev access is needed without resolving the blkcg / policy part
+ * of @ctx->input. Returns -errno on error.
*/
-struct block_device *blkcg_conf_open_bdev(char **inputp)
+int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx)
{
- char *input = *inputp;
+ char *input = ctx->input;
unsigned int major, minor;
struct block_device *bdev;
int key_len;

+ if (ctx->bdev)
+ return 0;
+
if (sscanf(input, "%u:%u%n", &major, &minor, &key_len) != 2)
- return ERR_PTR(-EINVAL);
+ return -EINVAL;

input += key_len;
if (!isspace(*input))
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
input = skip_spaces(input);

bdev = blkdev_get_no_open(MKDEV(major, minor));
if (!bdev)
- return ERR_PTR(-ENODEV);
+ return -ENODEV;
if (bdev_is_partition(bdev)) {
blkdev_put_no_open(bdev);
- return ERR_PTR(-ENODEV);
+ return -ENODEV;
}

- *inputp = input;
- return bdev;
+ ctx->body = input;
+ ctx->bdev = bdev;
+ return 0;
}

/**
* blkg_conf_prep - parse and prepare for per-blkg config update
* @blkcg: target block cgroup
* @pol: target policy
- * @input: input string
- * @ctx: blkg_conf_ctx to be filled
+ * @ctx: blkg_conf_ctx initialized with blkg_conf_init()
+ *
+ * Parse per-blkg config update from @ctx->input and initialize @ctx
+ * accordingly. On success, @ctx->body points to the part of @ctx->input
+ * following MAJ:MIN, @ctx->bdev points to the target block device and
+ * @ctx->blkg to the blkg being configured.
*
- * Parse per-blkg config update from @input and initialize @ctx with the
- * result. @ctx->blkg points to the blkg to be updated and @ctx->body the
- * part of @input following MAJ:MIN. This function returns with queue lock
- * held and must be paired with blkg_conf_finish().
+ * blkg_conf_open_bdev() may be called on @ctx beforehand. On success, this
+ * function returns with queue lock held and must be followed by
+ * blkg_conf_exit().
*/
int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
- char *input, struct blkg_conf_ctx *ctx)
+ struct blkg_conf_ctx *ctx)
__acquires(&bdev->bd_queue->queue_lock)
{
- struct block_device *bdev;
struct gendisk *disk;
struct request_queue *q;
struct blkcg_gq *blkg;
int ret;

- bdev = blkcg_conf_open_bdev(&input);
- if (IS_ERR(bdev))
- return PTR_ERR(bdev);
- disk = bdev->bd_disk;
+ ret = blkg_conf_open_bdev(ctx);
+ if (ret)
+ return ret;
+
+ disk = ctx->bdev->bd_disk;
q = disk->queue;

/*
@@ -766,9 +790,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
}
success:
blk_queue_exit(q);
- ctx->bdev = bdev;
ctx->blkg = blkg;
- ctx->body = input;
return 0;

fail_preloaded:
@@ -778,7 +800,6 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
fail_exit_queue:
blk_queue_exit(q);
fail:
- blkdev_put_no_open(bdev);
/*
* If queue was bypassing, we should retry. Do so after a
* short msleep(). It isn't strictly necessary but queue
@@ -794,19 +815,27 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
EXPORT_SYMBOL_GPL(blkg_conf_prep);

/**
- * blkg_conf_finish - finish up per-blkg config update
- * @ctx: blkg_conf_ctx initialized by blkg_conf_prep()
+ * blkg_conf_exit - clean up per-blkg config update
+ * @ctx: blkg_conf_ctx initialized with blkg_conf_init()
*
- * Finish up after per-blkg config update. This function must be paired
- * with blkg_conf_prep().
+ * Clean up after per-blkg config update. This function must be called on all
+ * blkg_conf_ctx's initialized with blkg_conf_init().
*/
-void blkg_conf_finish(struct blkg_conf_ctx *ctx)
+void blkg_conf_exit(struct blkg_conf_ctx *ctx)
__releases(&ctx->bdev->bd_queue->queue_lock)
{
- spin_unlock_irq(&bdev_get_queue(ctx->bdev)->queue_lock);
- blkdev_put_no_open(ctx->bdev);
+ if (ctx->blkg) {
+ spin_unlock_irq(&bdev_get_queue(ctx->bdev)->queue_lock);
+ ctx->blkg = NULL;
+ }
+
+ if (ctx->bdev) {
+ blkdev_put_no_open(ctx->bdev);
+ ctx->body = NULL;
+ ctx->bdev = NULL;
+ }
}
-EXPORT_SYMBOL_GPL(blkg_conf_finish);
+EXPORT_SYMBOL_GPL(blkg_conf_exit);

static void blkg_iostat_set(struct blkg_iostat *dst, struct blkg_iostat *src)
{
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 1e94e404eaa8..fe09e8b4c2a8 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -208,15 +208,17 @@ void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v);

struct blkg_conf_ctx {
+ char *input;
+ char *body;
struct block_device *bdev;
struct blkcg_gq *blkg;
- char *body;
};

-struct block_device *blkcg_conf_open_bdev(char **inputp);
+void blkg_conf_init(struct blkg_conf_ctx *ctx, char *input);
+int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx);
int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
- char *input, struct blkg_conf_ctx *ctx);
-void blkg_conf_finish(struct blkg_conf_ctx *ctx);
+ struct blkg_conf_ctx *ctx);
+void blkg_conf_exit(struct blkg_conf_ctx *ctx);

/**
* bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg
diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index 6955605629e4..22a3639a7a05 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -3091,9 +3091,11 @@ static ssize_t ioc_weight_write(struct kernfs_open_file *of, char *buf,
return nbytes;
}

- ret = blkg_conf_prep(blkcg, &blkcg_policy_iocost, buf, &ctx);
+ blkg_conf_init(&ctx, buf);
+
+ ret = blkg_conf_prep(blkcg, &blkcg_policy_iocost, &ctx);
if (ret)
- return ret;
+ goto err;

iocg = blkg_to_iocg(ctx.blkg);

@@ -3112,12 +3114,14 @@ static ssize_t ioc_weight_write(struct kernfs_open_file *of, char *buf,
weight_updated(iocg, &now);
spin_unlock(&iocg->ioc->lock);

- blkg_conf_finish(&ctx);
+ blkg_conf_exit(&ctx);
return nbytes;

einval:
- blkg_conf_finish(&ctx);
- return -EINVAL;
+ ret = -EINVAL;
+err:
+ blkg_conf_exit(&ctx);
+ return ret;
}

static u64 ioc_qos_prfill(struct seq_file *sf, struct blkg_policy_data *pd,
@@ -3172,19 +3176,22 @@ static const match_table_t qos_tokens = {
static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
size_t nbytes, loff_t off)
{
- struct block_device *bdev;
+ struct blkg_conf_ctx ctx;
struct gendisk *disk;
struct ioc *ioc;
u32 qos[NR_QOS_PARAMS];
bool enable, user;
- char *p;
+ char *body, *p;
int ret;

- bdev = blkcg_conf_open_bdev(&input);
- if (IS_ERR(bdev))
- return PTR_ERR(bdev);
+ blkg_conf_init(&ctx, input);

- disk = bdev->bd_disk;
+ ret = blkg_conf_open_bdev(&ctx);
+ if (ret)
+ goto err;
+
+ body = ctx.body;
+ disk = ctx.bdev->bd_disk;
ioc = q_to_ioc(disk->queue);
if (!ioc) {
ret = blk_iocost_init(disk);
@@ -3201,7 +3208,7 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
enable = ioc->enabled;
user = ioc->user_qos_params;

- while ((p = strsep(&input, " \t\n"))) {
+ while ((p = strsep(&body, " \t\n"))) {
substring_t args[MAX_OPT_ARGS];
char buf[32];
int tok;
@@ -3290,7 +3297,7 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
blk_mq_unquiesce_queue(disk->queue);
blk_mq_unfreeze_queue(disk->queue);

- blkdev_put_no_open(bdev);
+ blkg_conf_exit(&ctx);
return nbytes;
einval:
spin_unlock_irq(&ioc->lock);
@@ -3300,7 +3307,7 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,

ret = -EINVAL;
err:
- blkdev_put_no_open(bdev);
+ blkg_conf_exit(&ctx);
return ret;
}

@@ -3351,22 +3358,25 @@ static const match_table_t i_lcoef_tokens = {
static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
size_t nbytes, loff_t off)
{
- struct block_device *bdev;
+ struct blkg_conf_ctx ctx;
struct request_queue *q;
struct ioc *ioc;
u64 u[NR_I_LCOEFS];
bool user;
- char *p;
+ char *body, *p;
int ret;

- bdev = blkcg_conf_open_bdev(&input);
- if (IS_ERR(bdev))
- return PTR_ERR(bdev);
+ blkg_conf_init(&ctx, input);
+
+ ret = blkg_conf_open_bdev(&ctx);
+ if (ret)
+ goto err;

- q = bdev_get_queue(bdev);
+ body = ctx.body;
+ q = bdev_get_queue(ctx.bdev);
ioc = q_to_ioc(q);
if (!ioc) {
- ret = blk_iocost_init(bdev->bd_disk);
+ ret = blk_iocost_init(ctx.bdev->bd_disk);
if (ret)
goto err;
ioc = q_to_ioc(q);
@@ -3379,7 +3389,7 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
memcpy(u, ioc->params.i_lcoefs, sizeof(u));
user = ioc->user_cost_model;

- while ((p = strsep(&input, " \t\n"))) {
+ while ((p = strsep(&body, " \t\n"))) {
substring_t args[MAX_OPT_ARGS];
char buf[32];
int tok;
@@ -3426,7 +3436,7 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
blk_mq_unquiesce_queue(q);
blk_mq_unfreeze_queue(q);

- blkdev_put_no_open(bdev);
+ blkg_conf_exit(&ctx);
return nbytes;

einval:
@@ -3437,7 +3447,7 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,

ret = -EINVAL;
err:
- blkdev_put_no_open(bdev);
+ blkg_conf_exit(&ctx);
return ret;
}

diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
index ecdc10741836..3b3667f397a9 100644
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -842,9 +842,11 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
u64 oldval;
int ret;

- ret = blkg_conf_prep(blkcg, &blkcg_policy_iolatency, buf, &ctx);
+ blkg_conf_init(&ctx, buf);
+
+ ret = blkg_conf_prep(blkcg, &blkcg_policy_iolatency, &ctx);
if (ret)
- return ret;
+ goto out;

iolat = blkg_to_lat(ctx.blkg);
p = ctx.body;
@@ -880,7 +882,7 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
iolatency_clear_scaling(blkg);
ret = 0;
out:
- blkg_conf_finish(&ctx);
+ blkg_conf_exit(&ctx);
return ret ?: nbytes;
}

diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 6fb5a2f9e1ee..75841d1d9bf4 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -1369,9 +1369,11 @@ static ssize_t tg_set_conf(struct kernfs_open_file *of,
int ret;
u64 v;

- ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, buf, &ctx);
+ blkg_conf_init(&ctx, buf);
+
+ ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, &ctx);
if (ret)
- return ret;
+ goto out_finish;

ret = -EINVAL;
if (sscanf(ctx.body, "%llu", &v) != 1)
@@ -1390,7 +1392,7 @@ static ssize_t tg_set_conf(struct kernfs_open_file *of,
tg_conf_updated(tg, false);
ret = 0;
out_finish:
- blkg_conf_finish(&ctx);
+ blkg_conf_exit(&ctx);
return ret ?: nbytes;
}

@@ -1562,9 +1564,11 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of,
int ret;
int index = of_cft(of)->private;

- ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, buf, &ctx);
+ blkg_conf_init(&ctx, buf);
+
+ ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, &ctx);
if (ret)
- return ret;
+ goto out_finish;

tg = blkg_to_tg(ctx.blkg);
tg_update_carryover(tg);
@@ -1663,7 +1667,7 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of,
tg->td->limit_valid[LIMIT_LOW]);
ret = 0;
out_finish:
- blkg_conf_finish(&ctx);
+ blkg_conf_exit(&ctx);
return ret ?: nbytes;
}

--
2.39.0