[PATCH v6 22/34] nfs: Pin pages rather than ref'ing if appropriate

From: David Howells
Date: Mon Jan 16 2023 - 18:16:30 EST


Convert the NFS direct I/O code to use iov_iter_extract_pages() instead of
iov_iter_get_pages(). This will pin pages or leave them unaltered rather
than getting a ref on them as appropriate to the iterator.

The pages need to be pinned for DIO-read rather than having refs taken on
them to prevent VM copy-on-write from malfunctioning during a concurrent
fork() (the result of the I/O would otherwise end up only visible to the
child process and not the parent).

Signed-off-by: David Howells <dhowells@xxxxxxxxxx>
cc: Trond Myklebust <trond.myklebust@xxxxxxxxxxxxxxx>
cc: Anna Schumaker <anna@xxxxxxxxxx>
cc: Jeff Layton <jlayton@xxxxxxxxxx>
cc: linux-nfs@xxxxxxxxxxxxxxx
---

fs/nfs/direct.c | 32 ++++++++++++++++++--------------
1 file changed, 18 insertions(+), 14 deletions(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 42af84685f20..4a3108db2cb6 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -142,11 +142,15 @@ int nfs_swap_rw(struct kiocb *iocb, struct iov_iter *iter)
return 0;
}

-static void nfs_direct_release_pages(struct page **pages, unsigned int npages)
+static void nfs_direct_release_pages(struct page **pages, unsigned int npages,
+ unsigned int cleanup_mode)
{
unsigned int i;
- for (i = 0; i < npages; i++)
- put_page(pages[i]);
+
+ if (cleanup_mode) {
+ for (i = 0; i < npages; i++)
+ page_put_unpin(pages[i], cleanup_mode);
+ }
}

void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo,
@@ -327,17 +331,16 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
inode_dio_begin(inode);

while (iov_iter_count(iter)) {
- struct page **pagevec;
+ struct page **pagevec = NULL;
size_t bytes;
size_t pgbase;
unsigned npages, i;

- result = iov_iter_get_pages_alloc(iter, &pagevec,
- rsize, &pgbase,
- FOLL_DEST_BUF);
+ result = iov_iter_extract_pages(iter, &pagevec, rsize, INT_MAX,
+ FOLL_DEST_BUF, &pgbase);
if (result < 0)
break;
-
+
bytes = result;
npages = (result + pgbase + PAGE_SIZE - 1) / PAGE_SIZE;
for (i = 0; i < npages; i++) {
@@ -363,7 +366,8 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
pos += req_len;
dreq->bytes_left -= req_len;
}
- nfs_direct_release_pages(pagevec, npages);
+ nfs_direct_release_pages(pagevec, npages,
+ iov_iter_extract_mode(iter, FOLL_DEST_BUF));
kvfree(pagevec);
if (result < 0)
break;
@@ -787,14 +791,13 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,

NFS_I(inode)->write_io += iov_iter_count(iter);
while (iov_iter_count(iter)) {
- struct page **pagevec;
+ struct page **pagevec = NULL;
size_t bytes;
size_t pgbase;
unsigned npages, i;

- result = iov_iter_get_pages_alloc(iter, &pagevec,
- wsize, &pgbase,
- FOLL_SOURCE_BUF);
+ result = iov_iter_extract_pages(iter, &pagevec, wsize, INT_MAX,
+ FOLL_SOURCE_BUF, &pgbase);
if (result < 0)
break;

@@ -831,7 +834,8 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
pos += req_len;
dreq->bytes_left -= req_len;
}
- nfs_direct_release_pages(pagevec, npages);
+ nfs_direct_release_pages(pagevec, npages,
+ iov_iter_extract_mode(iter, FOLL_SOURCE_BUF));
kvfree(pagevec);
if (result < 0)
break;