[PATCH 4/5] bio: introduce BIO_FOLL_PIN flag

From: John Hubbard
Date: Sat Aug 22 2020 - 00:21:30 EST


Add a new BIO_FOLL_PIN flag to struct bio, whose "short int" flags field
was full, thuse triggering an expansion of the field from 16, to 32
bits. This allows for a nice assertion in bio_release_pages(), that the
bio page release mechanism matches the page acquisition mechanism.

Set BIO_FOLL_PIN whenever pin_user_pages_fast() is used, and check for
BIO_FOLL_PIN before using unpin_user_page().

Signed-off-by: John Hubbard <jhubbard@xxxxxxxxxx>
---
block/bio.c | 9 +++++++--
block/blk-map.c | 3 ++-
fs/direct-io.c | 4 ++--
include/linux/blk_types.h | 5 +++--
include/linux/uio.h | 5 +++--
lib/iov_iter.c | 9 +++++++--
6 files changed, 24 insertions(+), 11 deletions(-)

diff --git a/block/bio.c b/block/bio.c
index 00d548e3c2b8..dd8e85618d5e 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -952,6 +952,9 @@ void bio_release_pages(struct bio *bio, bool mark_dirty)
if (bio_flagged(bio, BIO_NO_PAGE_REF))
return;

+ if (WARN_ON_ONCE(!bio_flagged(bio, BIO_FOLL_PIN)))
+ return;
+
bio_for_each_segment_all(bvec, bio, iter_all) {
if (mark_dirty && !PageCompound(bvec->bv_page))
set_page_dirty_lock(bvec->bv_page);
@@ -1009,7 +1012,8 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
BUILD_BUG_ON(PAGE_PTRS_PER_BVEC < 2);
pages += entries_left * (PAGE_PTRS_PER_BVEC - 1);

- size = iov_iter_pin_user_pages(iter, pages, LONG_MAX, nr_pages, &offset);
+ size = iov_iter_pin_user_pages(bio, iter, pages, LONG_MAX, nr_pages,
+ &offset);
if (unlikely(size <= 0))
return size ? size : -EFAULT;

@@ -1056,7 +1060,8 @@ static int __bio_iov_append_get_pages(struct bio *bio, struct iov_iter *iter)
BUILD_BUG_ON(PAGE_PTRS_PER_BVEC < 2);
pages += entries_left * (PAGE_PTRS_PER_BVEC - 1);

- size = iov_iter_pin_user_pages(iter, pages, LONG_MAX, nr_pages, &offset);
+ size = iov_iter_pin_user_pages(bio, iter, pages, LONG_MAX, nr_pages,
+ &offset);
if (unlikely(size <= 0))
return size ? size : -EFAULT;

diff --git a/block/blk-map.c b/block/blk-map.c
index 7a095b4947ea..ddfff2f0b1cb 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -275,7 +275,8 @@ static struct bio *bio_map_user_iov(struct request_queue *q,
size_t offs, added = 0;
int npages;

- bytes = iov_iter_pin_user_pages_alloc(iter, &pages, LONG_MAX, &offs);
+ bytes = iov_iter_pin_user_pages_alloc(bio, iter, &pages,
+ LONG_MAX, &offs);
if (unlikely(bytes <= 0)) {
ret = bytes ? bytes : -EFAULT;
goto out_unmap;
diff --git a/fs/direct-io.c b/fs/direct-io.c
index b01c8d003bd3..4d0787ba85eb 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -170,8 +170,8 @@ static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio)
{
ssize_t ret;

- ret = iov_iter_pin_user_pages(sdio->iter, dio->pages, LONG_MAX, DIO_PAGES,
- &sdio->from);
+ ret = iov_iter_pin_user_pages(sdio->bio, sdio->iter, dio->pages,
+ LONG_MAX, DIO_PAGES, &sdio->from);

if (ret < 0 && sdio->blocks_available && (dio->op == REQ_OP_WRITE)) {
struct page *page = ZERO_PAGE(0);
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 4ecf4fed171f..d0e0da762af3 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -188,7 +188,7 @@ struct bio {
* top bits REQ_OP. Use
* accessors.
*/
- unsigned short bi_flags; /* status, etc and bvec pool number */
+ unsigned int bi_flags; /* status, etc and bvec pool number */
unsigned short bi_ioprio;
unsigned short bi_write_hint;
blk_status_t bi_status;
@@ -267,6 +267,7 @@ enum {
* of this bio. */
BIO_CGROUP_ACCT, /* has been accounted to a cgroup */
BIO_TRACKED, /* set if bio goes through the rq_qos path */
+ BIO_FOLL_PIN, /* must release pages via unpin_user_pages() */
BIO_FLAG_LAST
};

@@ -285,7 +286,7 @@ enum {
* freed.
*/
#define BVEC_POOL_BITS (3)
-#define BVEC_POOL_OFFSET (16 - BVEC_POOL_BITS)
+#define BVEC_POOL_OFFSET (32 - BVEC_POOL_BITS)
#define BVEC_POOL_IDX(bio) ((bio)->bi_flags >> BVEC_POOL_OFFSET)
#if (1<< BVEC_POOL_BITS) < (BVEC_POOL_NR+1)
# error "BVEC_POOL_BITS is too small"
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 29b0504a27cc..62bcf5e45f2b 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -209,6 +209,7 @@ size_t copy_to_iter_mcsafe(void *addr, size_t bytes, struct iov_iter *i)
return _copy_to_iter_mcsafe(addr, bytes, i);
}

+struct bio;
size_t iov_iter_zero(size_t bytes, struct iov_iter *);
unsigned long iov_iter_alignment(const struct iov_iter *i);
unsigned long iov_iter_gap_alignment(const struct iov_iter *i);
@@ -229,9 +230,9 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages);

const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags);

-ssize_t iov_iter_pin_user_pages(struct iov_iter *i, struct page **pages,
+ssize_t iov_iter_pin_user_pages(struct bio *bio, struct iov_iter *i, struct page **pages,
size_t maxsize, unsigned int maxpages, size_t *start);
-ssize_t iov_iter_pin_user_pages_alloc(struct iov_iter *i, struct page ***pages,
+ssize_t iov_iter_pin_user_pages_alloc(struct bio *bio, struct iov_iter *i, struct page ***pages,
size_t maxsize, size_t *start);

static inline size_t iov_iter_count(const struct iov_iter *i)
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index d818b16d136b..a4bc1b3a3fda 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -3,6 +3,7 @@
#include <linux/export.h>
#include <linux/bvec.h>
#include <linux/uio.h>
+#include <linux/bio.h>
#include <linux/pagemap.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
@@ -1309,7 +1310,7 @@ static ssize_t pipe_get_pages(struct iov_iter *i,
return __pipe_get_pages(i, min(maxsize, capacity), pages, iter_head, start);
}

-ssize_t iov_iter_pin_user_pages(struct iov_iter *i,
+ssize_t iov_iter_pin_user_pages(struct bio *bio, struct iov_iter *i,
struct page **pages, size_t maxsize, unsigned int maxpages,
size_t *start)
{
@@ -1335,6 +1336,8 @@ ssize_t iov_iter_pin_user_pages(struct iov_iter *i,
addr &= ~(PAGE_SIZE - 1);
n = DIV_ROUND_UP(len, PAGE_SIZE);

+ bio_set_flag(bio, BIO_FOLL_PIN);
+
res = pin_user_pages_fast(addr, n,
iov_iter_rw(i) != WRITE ? FOLL_WRITE : 0,
pages);
@@ -1426,7 +1429,7 @@ static ssize_t pipe_get_pages_alloc(struct iov_iter *i,
return n;
}

-ssize_t iov_iter_pin_user_pages_alloc(struct iov_iter *i,
+ssize_t iov_iter_pin_user_pages_alloc(struct bio *bio, struct iov_iter *i,
struct page ***pages, size_t maxsize,
size_t *start)
{
@@ -1454,6 +1457,8 @@ ssize_t iov_iter_pin_user_pages_alloc(struct iov_iter *i,
if (!p)
return -ENOMEM;

+ bio_set_flag(bio, BIO_FOLL_PIN);
+
res = pin_user_pages_fast(addr, n,
iov_iter_rw(i) != WRITE ? FOLL_WRITE : 0, p);
if (unlikely(res < 0)) {
--
2.28.0