Re: [PATCH v4 6/6] io_uring: add support for zone-append

From: Kanchan Joshi
Date: Fri Jul 31 2020 - 11:08:05 EST


On Fri, Jul 31, 2020 at 6:38 PM hch@xxxxxxxxxxxxx <hch@xxxxxxxxxxxxx> wrote:
>
> And FYI, this is what I'd do for a hacky aio-only prototype (untested):
>
>
> diff --git a/fs/aio.c b/fs/aio.c
> index 91e7cc4a9f179b..42b1934e38758b 100644
> --- a/fs/aio.c
> +++ b/fs/aio.c
> @@ -1438,7 +1438,10 @@ static void aio_complete_rw(struct kiocb *kiocb, long res, long res2)
> }
>
> iocb->ki_res.res = res;
> - iocb->ki_res.res2 = res2;
> + if ((kiocb->ki_flags & IOCB_REPORT_OFFSET) && res > 0)
> + iocb->ki_res.res2 = kiocb->ki_pos - res;
> + else
> + iocb->ki_res.res2 = res2;
> iocb_put(iocb);
> }
>
> @@ -1452,6 +1455,8 @@ static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb)
> req->ki_flags = iocb_flags(req->ki_filp);
> if (iocb->aio_flags & IOCB_FLAG_RESFD)
> req->ki_flags |= IOCB_EVENTFD;
> + if (iocb->aio_flags & IOCB_FLAG_REPORT_OFFSET)
> + req->ki_flags |= IOCB_REPORT_OFFSET;
> req->ki_hint = ki_hint_validate(file_write_hint(req->ki_filp));
> if (iocb->aio_flags & IOCB_FLAG_IOPRIO) {
> /*
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index f5abba86107d86..522b0a3437d420 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -316,6 +316,7 @@ enum rw_hint {
> #define IOCB_WRITE (1 << 6)
> #define IOCB_NOWAIT (1 << 7)
> #define IOCB_NOIO (1 << 9)
> +#define IOCB_REPORT_OFFSET (1 << 10)
>
> struct kiocb {
> struct file *ki_filp;
> diff --git a/include/uapi/linux/aio_abi.h b/include/uapi/linux/aio_abi.h
> index 8387e0af0f768a..e4313d7aa3b7e7 100644
> --- a/include/uapi/linux/aio_abi.h
> +++ b/include/uapi/linux/aio_abi.h
> @@ -55,6 +55,7 @@ enum {
> */
> #define IOCB_FLAG_RESFD (1 << 0)
> #define IOCB_FLAG_IOPRIO (1 << 1)
> +#define IOCB_FLAG_REPORT_OFFSET (1 << 2)
>
> /* read() from /dev/aio returns these structures. */
> struct io_event {

Looks good, but it drops io_uring.
How about two flags -
1. RWF_REPORT_OFFSET (only for aio) ----> aio fails the second one
2. RWF_REPORT_OFFSET_INDIRECT (for io_uring). ----> uring fails the first one
Since these are RWF flags, they can be used by other sync/async
transports also in future if need be.
Either of these flags will set single IOCB_REPORT_OFFSET, which can be
used by FS/Block etc (they don't have to worry how uring/aio sends it
up).

This is what I mean in code -

diff --git a/fs/aio.c b/fs/aio.c
index 91e7cc4a9f17..307dfbfb04f7 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1472,6 +1472,11 @@ static int aio_prep_rw(struct kiocb *req, const
struct iocb *iocb)
ret = kiocb_set_rw_flags(req, iocb->aio_rw_flags);
if (unlikely(ret))
return ret;
+ /* support only direct offset */
+ if (unlikely(iocb->aio_rw_flags & RWF_REPORT_OFFSET_INDIRECT))
+ return -EOPNOTSUPP;
+
req->ki_flags &= ~IOCB_HIPRI; /* no one is going to poll for this I/O */
return 0;
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 3e406bc1f855..5fa21644251f 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2451,6 +2451,7 @@ static int io_prep_rw(struct io_kiocb *req,
const struct io_uring_sqe *sqe,
struct kiocb *kiocb = &req->rw.kiocb;
unsigned ioprio;
int ret;
+ rwf_t rw_flags;

if (S_ISREG(file_inode(req->file)->i_mode))
req->flags |= REQ_F_ISREG;
@@ -2462,9 +2463,13 @@ static int io_prep_rw(struct io_kiocb *req,
const struct io_uring_sqe *sqe,
}
kiocb->ki_hint = ki_hint_validate(file_write_hint(kiocb->ki_filp));
kiocb->ki_flags = iocb_flags(kiocb->ki_filp);
- ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags));
+ rw_flags = READ_ONCE(sqe->rw_flags);
+ ret = kiocb_set_rw_flags(kiocb, rw_flags);
if (unlikely(ret))
return ret;
+ /* support only indirect offset */
+ if (unlikely(rw_flags & RWF_REPORT_OFFSET_DIRECT))
+ return -EOPNOTSUPP;

ioprio = READ_ONCE(sqe->ioprio);
if (ioprio) {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 8a00ba99284e..fe2f1f5c5d33 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3296,8 +3296,17 @@ static inline int kiocb_set_rw_flags(struct
kiocb *ki, rwf_t flags)
ki->ki_flags |= IOCB_DSYNC;
if (flags & RWF_SYNC)
ki->ki_flags |= (IOCB_DSYNC | IOCB_SYNC);
- if (flags & RWF_APPEND)
+ if (flags & RWF_APPEND) {
ki->ki_flags |= IOCB_APPEND;
+ /*
+ * 1. These flags do not make sense when used standalone
+ * 2. RWF_REPORT_OFFSET_DIRECT = report result
directly (for aio)
+ * 3. RWF_REPORT_INDIRECT_OFFSER = use pointer (for io_uring)
+ * */
+ if (flags & RWF_REPORT_OFFSET_DIRECT ||
+ flags & RWF_REPORT_OFFSET_INDIRECT)
+ ki->ki_flags |= IOCB_REPORT_OFFSET;