Re: [RFC PATCH 1/2] block: add simple copy support

From: Johannes Thumshirn
Date: Tue Dec 01 2020 - 04:59:15 EST


On 01/12/2020 08:14, SelvaKumar S wrote:
> +static inline int bio_check_copy_eod(struct bio *bio, sector_t start,
> + sector_t nr_sectors, sector_t maxsector)
> +{
> + if (nr_sectors && maxsector && (nr_sectors > maxsector ||
> + start > maxsector - nr_sectors)) {

Nit: I don't like the line break here, maybe:

if (nr_sectors && maxsector &&
(nr_sectors > maxsector || start > maxsector - nr_sectors)) {

> + handle_bad_sector(bio, maxsector);
> + return -EIO;
> + }
> + return 0;
> +}
> +
> /*
> * Check whether this bio extends beyond the end of the device or partition.
> * This may well happen - the kernel calls bread() without checking the size of
> @@ -737,6 +748,75 @@ static inline int bio_check_eod(struct bio *bio, sector_t maxsector)
> return 0;
> }
>
> +/*
> + * Check for copy limits and remap source ranges if needed.
> + */
> +static inline int blk_check_copy(struct bio *bio)

That function is a bit big to be marked as inline, isn't it?


> +{
> + struct hd_struct *p = NULL;
> + struct request_queue *q = bio->bi_disk->queue;
> + struct blk_copy_payload *payload;
> + unsigned short nr_range;
> + int ret = -EIO;
> + int i, copy_len = 0;
> +
> + rcu_read_lock();
> +
> + if (bio->bi_partno) {
> + p = __disk_get_part(bio->bi_disk, bio->bi_partno);
> + if (unlikely(!p))
> + goto out;
> + if (unlikely(bio_check_ro(bio, p)))
> + goto out;
> + } else {
> + if (unlikely(bio_check_ro(bio, &bio->bi_disk->part0)))
> + goto out;
> + }
> +
> + payload = bio_data(bio);
> + nr_range = payload->copy_range;
> +
> + /* cannot handle copy crossing nr_ranges limit */
> + if (payload->copy_range > q->limits.max_copy_nr_ranges)
> + goto out;
> +
> + for (i = 0; i < nr_range; i++) {
> + copy_len += payload->range[i].len;
> + if (p) {
> + if (bio_check_copy_eod(bio, payload->range[i].src,
> + payload->range[i].len, part_nr_sects_read(p)))
> + goto out;
> + payload->range[i].src += p->start_sect;
> + } else {
> + if (unlikely(bio_check_copy_eod(bio, payload->range[i].src,
> + payload->range[i].len,
> + get_capacity(bio->bi_disk))))
> + goto out;
> + }
> + }
> +
> + /* cannot handle copy more than copy limits */
> + if (copy_len > q->limits.max_copy_sectors)
> + goto out;
> +
> + if (p) {
> + if (unlikely(bio_check_copy_eod(bio, bio->bi_iter.bi_sector, copy_len,
> + part_nr_sects_read(p))))
> + goto out;
> + } else {
> + if (unlikely(bio_check_copy_eod(bio, bio->bi_iter.bi_sector, copy_len,
> + get_capacity(bio->bi_disk))))
> + goto out;
> +
> + }
> +


All these if (p) {} else {} branches make this function a bit hard to follow.

> + if (p)
> + bio->bi_partno = 0;
> + ret = 0;
> +out:
> + rcu_read_unlock();
> + return ret;
> +}
> /*
> * Remap block n of partition p to block n+start(p) of the disk.
> */


> diff --git a/block/blk-lib.c b/block/blk-lib.c
> index e90614fd8d6a..db4947f7014d 100644
> --- a/block/blk-lib.c
> +++ b/block/blk-lib.c
> @@ -150,6 +150,122 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
> }
> EXPORT_SYMBOL(blkdev_issue_discard);
>
> +int __blkdev_issue_copy(struct block_device *bdev, sector_t dest,
> + sector_t nr_srcs, struct range_entry *rlist, gfp_t gfp_mask,
> + int flags, struct bio **biop)
> +{
> + struct request_queue *q = bdev_get_queue(bdev);
> + struct bio *bio;
> + struct blk_copy_payload *payload;
> + unsigned int op;

I don't think op is needed.

> + sector_t bs_mask;
> + sector_t src_sects, len = 0, total_len = 0;
> + int i, ret, total_size;
> +
> + if (!q)
> + return -ENXIO;
> +
> + if (!nr_srcs)
> + return -EINVAL;
> +
> + if (bdev_read_only(bdev))
> + return -EPERM;
> +
> + if (!blk_queue_copy(q))
> + return -EOPNOTSUPP;
> + op = REQ_OP_COPY;
> +
> + bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
> + if (dest & bs_mask)
> + return -EINVAL;
> +
> + payload = kmalloc(sizeof(struct blk_copy_payload) +
> + nr_srcs * sizeof(struct range_entry),
> + GFP_ATOMIC | __GFP_NOWARN);

Please check if the use of struct_size() is possible. Probably even assign
total_size here so you don't need to do the size calculation twice.

> + if (!payload)
> + return -ENOMEM;
> +
> + bio = bio_alloc(gfp_mask, 1);
> + bio->bi_iter.bi_sector = dest;
> + bio_set_dev(bio, bdev);
> + bio_set_op_attrs(bio, op, REQ_NOMERGE);

bio_set_op_attrs() is deprecated, please don't use it.
bio->bi_opf = REQ_OP_COPY | REQ_NOMERGE;

> +
> + payload->dest = dest;
> +
> + for (i = 0; i < nr_srcs; i++) {
> + /* copy payload provided are in bytes */
> + src_sects = rlist[i].src;
> + if (src_sects & bs_mask)
> + return -EINVAL;
> + src_sects = src_sects >> SECTOR_SHIFT;
> +
> + if (len & bs_mask)
> + return -EINVAL;
> + len = rlist[i].len >> SECTOR_SHIFT;
> + if (len > q->limits.max_copy_range_sectors)
> + return -EINVAL;
> +
> + total_len += len;
> +
> + WARN_ON_ONCE((src_sects << 9) > UINT_MAX);
> +
> + payload->range[i].src = src_sects;
> + payload->range[i].len = len;
> + }
> +
> + /* storing # of source ranges */
> + payload->copy_range = i;
> + /* storing copy len so far */
> + payload->copy_size = total_len;
> +
> + total_size = sizeof(struct blk_copy_payload) + nr_srcs * sizeof(struct range_entry);

See above.

> + ret = bio_add_page(bio, virt_to_page(payload), total_size,
> + offset_in_page(payload));
> + if (ret != total_size) {
> + kfree(payload);
> + return -ENOMEM;
> + }
> +
> + *biop = bio;
> + return 0;
> +}
> +EXPORT_SYMBOL(__blkdev_issue_copy);
> +
> +/**
> + * blkdev_issue_copy - queue a copy
> + * @bdev: blockdev to issue copy for
> + * @dest: dest sector
> + * @nr_srcs: number of source ranges to copy
> + * @rlist: list of range entries
> + * @gfp_mask: memory allocation flags (for bio_alloc)
> + * @flags: BLKDEV_COPY_* flags to control behaviour //TODO
> + *
> + * Description:
> + * Issue a copy request for dest sector with source in rlist
> + */
> +int blkdev_issue_copy(struct block_device *bdev, sector_t dest,
> + int nr_srcs, struct range_entry *rlist,
> + gfp_t gfp_mask, unsigned long flags)
> +{
> + struct bio *bio = NULL;
> + int ret;
> +
> + ret = __blkdev_issue_copy(bdev, dest, nr_srcs, rlist, gfp_mask, flags,
> + &bio);
> + if (!ret && bio) {
> + ret = submit_bio_wait(bio);
> + if (ret == -EOPNOTSUPP)
> + ret = 0;
> +
> + kfree(page_address(bio_first_bvec_all(bio)->bv_page) +
> + bio_first_bvec_all(bio)->bv_offset);
> + bio_put(bio);
> + }
> +
> + return ret;

What happens with bio here if __blkdev_issue_copy() returns say -ENOMEM because
bio_add_page() fails?

Also please handle failure not success. Sth along the lines of

ret = __blkdev_issue_copy(bdev, dest, nr_srcs, rlist, gfp_mask, flags,
&bio);
if (ret)
...

ret = submit_bio_wait();
if (ret)
...

...

return ret;
}

> +}
> +EXPORT_SYMBOL(blkdev_issue_copy);