Re: [PATCH v6 21/34] 9p: Pin pages rather than ref'ing if appropriate

From: Al Viro
Date: Wed Jan 18 2023 - 21:52:40 EST


On Mon, Jan 16, 2023 at 11:10:32PM +0000, David Howells wrote:

> @@ -310,73 +310,34 @@ static int p9_get_mapped_pages(struct virtio_chan *chan,
> struct iov_iter *data,
> int count,
> size_t *offs,
> - int *need_drop,
> + int *cleanup_mode,
> unsigned int gup_flags)
> {
> int nr_pages;
> int err;
> + int n;
>
> if (!iov_iter_count(data))
> return 0;
>
> - if (!iov_iter_is_kvec(data)) {
> - int n;
> - /*
> - * We allow only p9_max_pages pinned. We wait for the
> - * Other zc request to finish here
> - */
> - if (atomic_read(&vp_pinned) >= chan->p9_max_pages) {
> - err = wait_event_killable(vp_wq,
> - (atomic_read(&vp_pinned) < chan->p9_max_pages));
> - if (err == -ERESTARTSYS)
> - return err;
> - }
> - n = iov_iter_get_pages_alloc(data, pages, count, offs,
> - gup_flags);
> - if (n < 0)
> - return n;
> - *need_drop = 1;
> - nr_pages = DIV_ROUND_UP(n + *offs, PAGE_SIZE);
> - atomic_add(nr_pages, &vp_pinned);
> - return n;
> - } else {
> - /* kernel buffer, no need to pin pages */
> - int index;
> - size_t len;
> - void *p;
> -
> - /* we'd already checked that it's non-empty */
> - while (1) {
> - len = iov_iter_single_seg_count(data);
> - if (likely(len)) {
> - p = data->kvec->iov_base + data->iov_offset;
> - break;
> - }
> - iov_iter_advance(data, 0);
> - }
> - if (len > count)
> - len = count;
> -
> - nr_pages = DIV_ROUND_UP((unsigned long)p + len, PAGE_SIZE) -
> - (unsigned long)p / PAGE_SIZE;
> -
> - *pages = kmalloc_array(nr_pages, sizeof(struct page *),
> - GFP_NOFS);
> - if (!*pages)
> - return -ENOMEM;
> -
> - *need_drop = 0;
> - p -= (*offs = offset_in_page(p));
> - for (index = 0; index < nr_pages; index++) {
> - if (is_vmalloc_addr(p))
> - (*pages)[index] = vmalloc_to_page(p);
> - else
> - (*pages)[index] = kmap_to_page(p);
> - p += PAGE_SIZE;
> - }
> - iov_iter_advance(data, len);
> - return len;
> + /*
> + * We allow only p9_max_pages pinned. We wait for the
> + * Other zc request to finish here
> + */
> + if (atomic_read(&vp_pinned) >= chan->p9_max_pages) {
> + err = wait_event_killable(vp_wq,
> + (atomic_read(&vp_pinned) < chan->p9_max_pages));
> + if (err == -ERESTARTSYS)
> + return err;
> }
> +
> + n = iov_iter_extract_pages(data, pages, count, offs, gup_flags);

Wait a sec; just how would that work for ITER_KVEC? AFAICS, in your
tree that would blow with -EFAULT...

Yup; in p9_client_readdir() in your tree:

net/9p/client.c:2057: iov_iter_kvec(&to, ITER_DEST, &kv, 1, count);

net/9p/client.c:2077: req = p9_client_zc_rpc(clnt, P9_TREADDIR, &to, NULL, rsize, 0,
net/9p/client.c:2078: 11, "dqd", fid->fid, offset, rsize);

where
net/9p/client.c:799: err = c->trans_mod->zc_request(c, req, uidata, uodata,
net/9p/client.c:800: inlen, olen, in_hdrlen);

and in p9_virtio_zc_request(), which is a possible ->zc_request() instance
net/9p/trans_virtio.c:402: int n = p9_get_mapped_pages(chan, &out_pages, uodata,
net/9p/trans_virtio.c:403: outlen, &offs, &cleanup_mode,
net/9p/trans_virtio.c:404: FOLL_DEST_BUF);

with p9_get_mapped_pages() hitting
net/9p/trans_virtio.c:334: n = iov_iter_extract_pages(data, pages, count, offs, gup_flags);
net/9p/trans_virtio.c:335: if (n < 0)
net/9p/trans_virtio.c:336: return n;

and in iov_iter_extract_get_pages()
lib/iov_iter.c:2250: if (likely(user_backed_iter(i)))
lib/iov_iter.c:2251: return iov_iter_extract_user_pages(i, pages, maxsize,
lib/iov_iter.c:2252: maxpages, gup_flags,
lib/iov_iter.c:2253: offset0);
lib/iov_iter.c:2254: if (iov_iter_is_bvec(i))
lib/iov_iter.c:2255: return iov_iter_extract_bvec_pages(i, pages, maxsize,
lib/iov_iter.c:2256: maxpages, gup_flags,
lib/iov_iter.c:2257: offset0);
lib/iov_iter.c:2258: if (iov_iter_is_pipe(i))
lib/iov_iter.c:2259: return iov_iter_extract_pipe_pages(i, pages, maxsize,
lib/iov_iter.c:2260: maxpages, gup_flags,
lib/iov_iter.c:2261: offset0);
lib/iov_iter.c:2262: if (iov_iter_is_xarray(i))
lib/iov_iter.c:2263: return iov_iter_extract_xarray_pages(i, pages, maxsize,
lib/iov_iter.c:2264: maxpages, gup_flags,
lib/iov_iter.c:2265: offset0);
lib/iov_iter.c:2266: return -EFAULT;

All quoted lines by your
https://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs.git/tree/
How could that possibly work?