Re: [PATCH 1/2] block: add max_open_zones to blk-sysfs

From: Damien Le Moal
Date: Mon Jun 29 2020 - 22:17:34 EST


On 2020/06/30 10:49, Damien Le Moal wrote:
> On 2020/06/16 19:28, Niklas Cassel wrote:
>> Add a new max_open_zones definition in the sysfs documentation.
>> This definition will be common for all devices utilizing the zoned block
>> device support in the kernel.
>>
>> Export max open zones according to this new definition for NVMe Zoned
>> Namespace devices, ZAC ATA devices (which are treated as SCSI devices by
>> the kernel), and ZBC SCSI devices.
>>
>> Add the new max_open_zones struct member to the request_queue, rather
>
> Add the new max_open_zones member to struct request_queue...
>
>> than as a queue limit, since this property cannot be split across stacking
>> drivers.
>
> But device-mapper target device have a request queue too and it looks like your
> patch is not setting any value, using the default 0 for dm-linear and dm-flakey.
> Attaching the new attribute directly to the request queue rather than adding it
> as part of the queue limits seems odd. Even if DM case is left unsupported
> (using the default 0 = no limit), it may be cleaner to add the field as part of
> the limit struct.
>
> Adding the field as a device attribute rather than a queue limit, similarly to
> the device maximum queue depth would be another option. In such case, including
> the field directly as part of the request queue makes more sense.

Thinking more about this one, struct request_queue has nr_zones field, which is
not a queue limit but still exported as a queue attribute. Device mapper
exposing a zoned drive target do set this field manually. So I guess the same
approach is valid for max_open_zones (and max_active_zones). So OK, disregard
this comment.

The other comments I sent below remain though.

>
>>
>> Signed-off-by: Niklas Cassel <niklas.cassel@xxxxxxx>
>> ---
>> Documentation/block/queue-sysfs.rst | 7 +++++++
>> block/blk-sysfs.c | 15 +++++++++++++++
>> drivers/nvme/host/zns.c | 1 +
>> drivers/scsi/sd_zbc.c | 4 ++++
>> include/linux/blkdev.h | 20 ++++++++++++++++++++
>> 5 files changed, 47 insertions(+)
>>
>> diff --git a/Documentation/block/queue-sysfs.rst b/Documentation/block/queue-sysfs.rst
>> index 6a8513af9201..f01cf8530ae4 100644
>> --- a/Documentation/block/queue-sysfs.rst
>> +++ b/Documentation/block/queue-sysfs.rst
>> @@ -117,6 +117,13 @@ Maximum number of elements in a DMA scatter/gather list with integrity
>> data that will be submitted by the block layer core to the associated
>> block driver.
>>
>> +max_open_zones (RO)
>> +-------------------
>> +For zoned block devices (zoned attribute indicating "host-managed" or
>> +"host-aware"), the sum of zones belonging to any of the zone states:
>> +EXPLICIT OPEN or IMPLICIT OPEN, is limited by this value.
>> +If this value is 0, there is no limit.
>> +
>> max_sectors_kb (RW)
>> -------------------
>> This is the maximum number of kilobytes that the block layer will allow
>> diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
>> index 02643e149d5e..fa42961e9678 100644
>> --- a/block/blk-sysfs.c
>> +++ b/block/blk-sysfs.c
>> @@ -305,6 +305,11 @@ static ssize_t queue_nr_zones_show(struct request_queue *q, char *page)
>> return queue_var_show(blk_queue_nr_zones(q), page);
>> }
>>
>> +static ssize_t queue_max_open_zones_show(struct request_queue *q, char *page)
>> +{
>> + return queue_var_show(queue_max_open_zones(q), page);
>> +}
>> +
>> static ssize_t queue_nomerges_show(struct request_queue *q, char *page)
>> {
>> return queue_var_show((blk_queue_nomerges(q) << 1) |
>> @@ -667,6 +672,11 @@ static struct queue_sysfs_entry queue_nr_zones_entry = {
>> .show = queue_nr_zones_show,
>> };
>>
>> +static struct queue_sysfs_entry queue_max_open_zones_entry = {
>> + .attr = {.name = "max_open_zones", .mode = 0444 },
>> + .show = queue_max_open_zones_show,
>> +};
>> +
>> static struct queue_sysfs_entry queue_nomerges_entry = {
>> .attr = {.name = "nomerges", .mode = 0644 },
>> .show = queue_nomerges_show,
>> @@ -765,6 +775,7 @@ static struct attribute *queue_attrs[] = {
>> &queue_nonrot_entry.attr,
>> &queue_zoned_entry.attr,
>> &queue_nr_zones_entry.attr,
>> + &queue_max_open_zones_entry.attr,
>> &queue_nomerges_entry.attr,
>> &queue_rq_affinity_entry.attr,
>> &queue_iostats_entry.attr,
>> @@ -792,6 +803,10 @@ static umode_t queue_attr_visible(struct kobject *kobj, struct attribute *attr,
>> (!q->mq_ops || !q->mq_ops->timeout))
>> return 0;
>>
>> + if (attr == &queue_max_open_zones_entry.attr &&
>> + !blk_queue_is_zoned(q))
>> + return 0;
>> +
>> return attr->mode;
>> }
>>
>> diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c
>> index c08f6281b614..af156529f3b6 100644
>> --- a/drivers/nvme/host/zns.c
>> +++ b/drivers/nvme/host/zns.c
>> @@ -82,6 +82,7 @@ int nvme_update_zone_info(struct gendisk *disk, struct nvme_ns *ns,
>>
>> q->limits.zoned = BLK_ZONED_HM;
>> blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
>> + blk_queue_max_open_zones(q, le32_to_cpu(id->mor) + 1);
>> free_data:
>> kfree(id);
>> return status;
>> diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
>> index 183a20720da9..aa3564139b40 100644
>> --- a/drivers/scsi/sd_zbc.c
>> +++ b/drivers/scsi/sd_zbc.c
>> @@ -717,6 +717,10 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf)
>> /* The drive satisfies the kernel restrictions: set it up */
>> blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
>> blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE);
>> + if (sdkp->zones_max_open == U32_MAX)
>> + blk_queue_max_open_zones(q, 0);
>> + else
>> + blk_queue_max_open_zones(q, sdkp->zones_max_open);
>
> This is correct only for host-managed drives. Host-aware models define the
> "OPTIMAL NUMBER OF OPEN SEQUENTIAL WRITE PREFERRED ZONES" instead of a maximum
> number of open sequential write required zones.
>
> Since the standard does not actually explicitly define what the value of the
> maximum number of open sequential write required zones should be for a
> host-aware drive, I would suggest to always have the max_open_zones value set to
> 0 for host-aware disks.
>
>> nr_zones = round_up(sdkp->capacity, zone_blocks) >> ilog2(zone_blocks);
>>
>> /* READ16/WRITE16 is mandatory for ZBC disks */
>> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
>> index 8fd900998b4e..2f332f00501d 100644
>> --- a/include/linux/blkdev.h
>> +++ b/include/linux/blkdev.h
>> @@ -520,6 +520,7 @@ struct request_queue {
>> unsigned int nr_zones;
>> unsigned long *conv_zones_bitmap;
>> unsigned long *seq_zones_wlock;
>> + unsigned int max_open_zones;
>> #endif /* CONFIG_BLK_DEV_ZONED */
>>
>> /*
>> @@ -729,6 +730,17 @@ static inline bool blk_queue_zone_is_seq(struct request_queue *q,
>> return true;
>> return !test_bit(blk_queue_zone_no(q, sector), q->conv_zones_bitmap);
>> }
>> +
>> +static inline void blk_queue_max_open_zones(struct request_queue *q,
>> + unsigned int max_open_zones)
>> +{
>> + q->max_open_zones = max_open_zones;
>> +}
>> +
>> +static inline unsigned int queue_max_open_zones(const struct request_queue *q)
>> +{
>> + return q->max_open_zones;
>> +}
>> #else /* CONFIG_BLK_DEV_ZONED */
>> static inline unsigned int blk_queue_nr_zones(struct request_queue *q)
>> {
>> @@ -744,6 +756,14 @@ static inline unsigned int blk_queue_zone_no(struct request_queue *q,
>> {
>> return 0;
>> }
>> +static inline void blk_queue_max_open_zones(struct request_queue *q,
>> + unsigned int max_open_zones)
>> +{
>> +}
>
> Why is this one necessary ? For the !CONFIG_BLK_DEV_ZONED case, no driver should
> ever call this function.
>
>> +static inline unsigned int queue_max_open_zones(const struct request_queue *q)
>> +{
>> + return 0;
>> +}
>> #endif /* CONFIG_BLK_DEV_ZONED */
>>
>> static inline bool rq_is_sync(struct request *rq)
>>
>
>


--
Damien Le Moal
Western Digital Research