Re: linux-stable-5.10-y CVE-2022-1508 of io_uring module

From: Pavel Begunkov
Date: Sat May 07 2022 - 05:17:42 EST


On 5/6/22 19:22, Jens Axboe wrote:
On 5/6/22 10:15 AM, Jens Axboe wrote:
On 5/6/22 9:57 AM, Pavel Begunkov wrote:
On 5/6/22 03:16, Jens Axboe wrote:
On 5/5/22 8:11 AM, Guo Xuenan wrote:
Hi, Pavel & Jens

CVE-2022-1508[1] contains an patch[2] of io_uring. As Jones reported,
it is not enough only apply [2] to stable-5.10.
Io_uring is very valuable and active module of linux kernel.
I've tried to apply these two patches[3] [4] to my local 5.10 code, I
found my understanding of io_uring is not enough to resolve all conflicts.

Since 5.10 is an important stable branch of linux, we would appreciate
your help in solving this problem.

Yes, this really needs to get buttoned up for 5.10. I seem to recall
there was a reproducer for this that was somewhat saner than the
syzbot one (which doesn't do anything for me). Pavel, do you have one?

No, it was the only repro and was triggering the problem
just fine back then

I modified it a bit and I can now trigger it.

Pavel, why don't we just keep it really simple and just always save the
iter state in read/write, and use the restore instead of the revert?

The problem here is where we're doing revert. If it's done deep in
the stack and then while unwinding someone decides to revert it again,
e.g. blkdev_read_iter(), we're screwed.

The last attempt was backporting 20+ patches that would move revert
into io_read/io_write, i.e. REQ_F_REISSUE, back that failed some of
your tests back then. (was it read retry tests iirc?)


Then it's just a trivial backport of ff6165b2d7f6 and the trivial
io_uring patch after that.


diff --git a/fs/io_uring.c b/fs/io_uring.c
index ab9290ab4cae..138f204db72a 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -3429,6 +3429,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
struct kiocb *kiocb = &req->rw.kiocb;
struct iov_iter __iter, *iter = &__iter;
struct io_async_rw *rw = req->async_data;
+ struct iov_iter_state iter_state;
ssize_t io_size, ret, ret2;
bool no_async;
@@ -3458,6 +3459,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
if (unlikely(ret))
goto out_free;
+ iov_iter_save_state(iter, &iter_state);
ret = io_iter_do_read(req, iter);
if (!ret) {
@@ -3473,7 +3475,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
if (req->file->f_flags & O_NONBLOCK)
goto done;
/* some cases will consume bytes even on error returns */
- iov_iter_revert(iter, io_size - iov_iter_count(iter));
+ iov_iter_restore(iter, &iter_state);
ret = 0;
goto copy_iov;
} else if (ret < 0) {
@@ -3557,6 +3559,7 @@ static int io_write(struct io_kiocb *req, bool force_nonblock,
struct kiocb *kiocb = &req->rw.kiocb;
struct iov_iter __iter, *iter = &__iter;
struct io_async_rw *rw = req->async_data;
+ struct iov_iter_state iter_state;
ssize_t ret, ret2, io_size;
if (rw)
@@ -3574,6 +3577,8 @@ static int io_write(struct io_kiocb *req, bool force_nonblock,
else
kiocb->ki_flags |= IOCB_NOWAIT;
+ iov_iter_save_state(iter, &iter_state);
+
/* If the file doesn't support async, just async punt */
if (force_nonblock && !io_file_supports_async(req->file, WRITE))
goto copy_iov;
@@ -3626,7 +3631,7 @@ static int io_write(struct io_kiocb *req, bool force_nonblock,
} else {
copy_iov:
/* some cases will consume bytes even on error returns */
- iov_iter_revert(iter, io_size - iov_iter_count(iter));
+ iov_iter_restore(iter, &iter_state);
ret = io_setup_async_rw(req, iovec, inline_vecs, iter, false);
if (!ret)
return -EAGAIN;
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 27ff8eb786dc..cedb68e49e4f 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -26,6 +26,12 @@ enum iter_type {
ITER_DISCARD = 64,
};
+struct iov_iter_state {
+ size_t iov_offset;
+ size_t count;
+ unsigned long nr_segs;
+};
+
struct iov_iter {
/*
* Bit 0 is the read/write bit, set if we're writing.
@@ -55,6 +61,14 @@ static inline enum iter_type iov_iter_type(const struct iov_iter *i)
return i->type & ~(READ | WRITE);
}
+static inline void iov_iter_save_state(struct iov_iter *iter,
+ struct iov_iter_state *state)
+{
+ state->iov_offset = iter->iov_offset;
+ state->count = iter->count;
+ state->nr_segs = iter->nr_segs;
+}
+
static inline bool iter_is_iovec(const struct iov_iter *i)
{
return iov_iter_type(i) == ITER_IOVEC;
@@ -226,6 +240,7 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages,
ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages,
size_t maxsize, size_t *start);
int iov_iter_npages(const struct iov_iter *i, int maxpages);
+void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state);
const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags);
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 1b0a349fbcd9..00a66229d182 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -1857,3 +1857,39 @@ int iov_iter_for_each_range(struct iov_iter *i, size_t bytes,
return err;
}
EXPORT_SYMBOL(iov_iter_for_each_range);
+
+/**
+ * iov_iter_restore() - Restore a &struct iov_iter to the same state as when
+ * iov_iter_save_state() was called.
+ *
+ * @i: &struct iov_iter to restore
+ * @state: state to restore from
+ *
+ * Used after iov_iter_save_state() to bring restore @i, if operations may
+ * have advanced it.
+ *
+ * Note: only works on ITER_IOVEC, ITER_BVEC, and ITER_KVEC
+ */
+void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state)
+{
+ if (WARN_ON_ONCE(!iov_iter_is_bvec(i) && !iter_is_iovec(i)) &&
+ !iov_iter_is_kvec(i))
+ return;
+ i->iov_offset = state->iov_offset;
+ i->count = state->count;
+ /*
+ * For the *vec iters, nr_segs + iov is constant - if we increment
+ * the vec, then we also decrement the nr_segs count. Hence we don't
+ * need to track both of these, just one is enough and we can deduct
+ * the other from that. ITER_KVEC and ITER_IOVEC are the same struct
+ * size, so we can just increment the iov pointer as they are unionzed.
+ * ITER_BVEC _may_ be the same size on some archs, but on others it is
+ * not. Be safe and handle it separately.
+ */
+ BUILD_BUG_ON(sizeof(struct iovec) != sizeof(struct kvec));
+ if (iov_iter_is_bvec(i))
+ i->bvec -= state->nr_segs - i->nr_segs;
+ else
+ i->iov -= state->nr_segs - i->nr_segs;
+ i->nr_segs = state->nr_segs;
+}


--
Pavel Begunkov