Re: still nfs problems [Was: Linux 2.6.37-rc8]

From: Uwe Kleine-König
Date: Mon Jan 10 2011 - 05:51:06 EST


Hi Trond,

On Sat, Jan 08, 2011 at 06:15:51PM -0500, Trond Myklebust wrote:
> -----------------------------------------------------------------------------------
It would be great if you could add a "8<" in the line above next time.
Then git-am -c does the right thing (at least I think so).

> From 8b2e60cef5c65eef41ab61286f62dec6bfb1ac27 Mon Sep 17 00:00:00 2001
> From: Trond Myklebust <Trond.Myklebust@xxxxxxxxxx>
> Date: Sat, 8 Jan 2011 17:45:38 -0500
> Subject: [PATCH] NFS: Don't use vm_map_ram() in readdir
>
> vm_map_ram() is not available on NOMMU platforms, and causes trouble
> on incoherrent architectures such as ARM when we access the page data
> through both the direct and the virtual mapping.
>
> The alternative is to use the direct mapping to access page data
> for the case when we are not crossing a page boundary, but to copy
> the data into a linear scratch buffer when we are accessing data
> that spans page boundaries.
>
> Signed-off-by: Trond Myklebust <Trond.Myklebust@xxxxxxxxxx>
Tested-by: Uwe Kleine-König <u.kleine-koenig@xxxxxxxxxxxxxx>

on tx28.

Thanks
Uwe

> ---
> fs/nfs/dir.c | 44 ++++++-------
> fs/nfs/nfs2xdr.c | 6 --
> fs/nfs/nfs3xdr.c | 6 --
> fs/nfs/nfs4xdr.c | 6 --
> include/linux/sunrpc/xdr.h | 4 +-
> net/sunrpc/xdr.c | 155 +++++++++++++++++++++++++++++++++++---------
> 6 files changed, 148 insertions(+), 73 deletions(-)
>
> diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
> index 996dd89..0108cf4 100644
> --- a/fs/nfs/dir.c
> +++ b/fs/nfs/dir.c
> @@ -33,7 +33,6 @@
> #include <linux/namei.h>
> #include <linux/mount.h>
> #include <linux/sched.h>
> -#include <linux/vmalloc.h>
> #include <linux/kmemleak.h>
>
> #include "delegation.h"
> @@ -459,25 +458,26 @@ out:
> /* Perform conversion from xdr to cache array */
> static
> int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry,
> - void *xdr_page, struct page *page, unsigned int buflen)
> + struct page **xdr_pages, struct page *page, unsigned int buflen)
> {
> struct xdr_stream stream;
> - struct xdr_buf buf;
> - __be32 *ptr = xdr_page;
> + struct xdr_buf buf = {
> + .pages = xdr_pages,
> + .page_len = buflen,
> + .buflen = buflen,
> + .len = buflen,
> + };
> + struct page *scratch;
> struct nfs_cache_array *array;
> unsigned int count = 0;
> int status;
>
> - buf.head->iov_base = xdr_page;
> - buf.head->iov_len = buflen;
> - buf.tail->iov_len = 0;
> - buf.page_base = 0;
> - buf.page_len = 0;
> - buf.buflen = buf.head->iov_len;
> - buf.len = buf.head->iov_len;
> -
> - xdr_init_decode(&stream, &buf, ptr);
> + scratch = alloc_page(GFP_KERNEL);
> + if (scratch == NULL)
> + return -ENOMEM;
>
> + xdr_init_decode(&stream, &buf, NULL);
> + xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
>
> do {
> status = xdr_decode(desc, entry, &stream);
> @@ -506,6 +506,8 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
> } else
> status = PTR_ERR(array);
> }
> +
> + put_page(scratch);
> return status;
> }
>
> @@ -521,7 +523,6 @@ static
> void nfs_readdir_free_large_page(void *ptr, struct page **pages,
> unsigned int npages)
> {
> - vm_unmap_ram(ptr, npages);
> nfs_readdir_free_pagearray(pages, npages);
> }
>
> @@ -530,9 +531,8 @@ void nfs_readdir_free_large_page(void *ptr, struct page **pages,
> * to nfs_readdir_free_large_page
> */
> static
> -void *nfs_readdir_large_page(struct page **pages, unsigned int npages)
> +int nfs_readdir_large_page(struct page **pages, unsigned int npages)
> {
> - void *ptr;
> unsigned int i;
>
> for (i = 0; i < npages; i++) {
> @@ -541,13 +541,11 @@ void *nfs_readdir_large_page(struct page **pages, unsigned int npages)
> goto out_freepages;
> pages[i] = page;
> }
> + return 0;
>
> - ptr = vm_map_ram(pages, npages, 0, PAGE_KERNEL);
> - if (!IS_ERR_OR_NULL(ptr))
> - return ptr;
> out_freepages:
> nfs_readdir_free_pagearray(pages, i);
> - return NULL;
> + return -ENOMEM;
> }
>
> static
> @@ -577,8 +575,8 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
> memset(array, 0, sizeof(struct nfs_cache_array));
> array->eof_index = -1;
>
> - pages_ptr = nfs_readdir_large_page(pages, array_size);
> - if (!pages_ptr)
> + status = nfs_readdir_large_page(pages, array_size);
> + if (status < 0)
> goto out_release_array;
> do {
> unsigned int pglen;
> @@ -587,7 +585,7 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
> if (status < 0)
> break;
> pglen = status;
> - status = nfs_readdir_page_filler(desc, &entry, pages_ptr, page, pglen);
> + status = nfs_readdir_page_filler(desc, &entry, pages, page, pglen);
> if (status < 0) {
> if (status == -ENOSPC)
> status = 0;
> diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
> index 5914a19..b382a1b 100644
> --- a/fs/nfs/nfs2xdr.c
> +++ b/fs/nfs/nfs2xdr.c
> @@ -487,12 +487,6 @@ nfs_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_se
>
> entry->d_type = DT_UNKNOWN;
>
> - p = xdr_inline_peek(xdr, 8);
> - if (p != NULL)
> - entry->eof = !p[0] && p[1];
> - else
> - entry->eof = 0;
> -
> return p;
>
> out_overflow:
> diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
> index f6cc60f..ba91236 100644
> --- a/fs/nfs/nfs3xdr.c
> +++ b/fs/nfs/nfs3xdr.c
> @@ -647,12 +647,6 @@ nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_s
> memset((u8*)(entry->fh), 0, sizeof(*entry->fh));
> }
>
> - p = xdr_inline_peek(xdr, 8);
> - if (p != NULL)
> - entry->eof = !p[0] && p[1];
> - else
> - entry->eof = 0;
> -
> return p;
>
> out_overflow:
> diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
> index 9f1826b..0662a98 100644
> --- a/fs/nfs/nfs4xdr.c
> +++ b/fs/nfs/nfs4xdr.c
> @@ -6215,12 +6215,6 @@ __be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
> if (verify_attr_len(xdr, p, len) < 0)
> goto out_overflow;
>
> - p = xdr_inline_peek(xdr, 8);
> - if (p != NULL)
> - entry->eof = !p[0] && p[1];
> - else
> - entry->eof = 0;
> -
> return p;
>
> out_overflow:
> diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
> index 498ab93..7783c68 100644
> --- a/include/linux/sunrpc/xdr.h
> +++ b/include/linux/sunrpc/xdr.h
> @@ -201,6 +201,8 @@ struct xdr_stream {
>
> __be32 *end; /* end of available buffer space */
> struct kvec *iov; /* pointer to the current kvec */
> + struct kvec scratch; /* Scratch buffer */
> + struct page **page_ptr; /* pointer to the current page */
> };
>
> extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
> @@ -208,7 +210,7 @@ extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes);
> extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages,
> unsigned int base, unsigned int len);
> extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
> -extern __be32 *xdr_inline_peek(struct xdr_stream *xdr, size_t nbytes);
> +extern void xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen);
> extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes);
> extern void xdr_read_pages(struct xdr_stream *xdr, unsigned int len);
> extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len);
> diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
> index cd9e841..679cd67 100644
> --- a/net/sunrpc/xdr.c
> +++ b/net/sunrpc/xdr.c
> @@ -552,6 +552,74 @@ void xdr_write_pages(struct xdr_stream *xdr, struct page **pages, unsigned int b
> }
> EXPORT_SYMBOL_GPL(xdr_write_pages);
>
> +static void xdr_set_iov(struct xdr_stream *xdr, struct kvec *iov,
> + __be32 *p, unsigned int len)
> +{
> + if (len > iov->iov_len)
> + len = iov->iov_len;
> + if (p == NULL)
> + p = (__be32*)iov->iov_base;
> + xdr->p = p;
> + xdr->end = (__be32*)(iov->iov_base + len);
> + xdr->iov = iov;
> + xdr->page_ptr = NULL;
> +}
> +
> +static int xdr_set_page_base(struct xdr_stream *xdr,
> + unsigned int base, unsigned int len)
> +{
> + unsigned int pgnr;
> + unsigned int maxlen;
> + unsigned int pgoff;
> + unsigned int pgend;
> + void *kaddr;
> +
> + maxlen = xdr->buf->page_len;
> + if (base >= maxlen)
> + return -EINVAL;
> + maxlen -= base;
> + if (len > maxlen)
> + len = maxlen;
> +
> + base += xdr->buf->page_base;
> +
> + pgnr = base >> PAGE_SHIFT;
> + xdr->page_ptr = &xdr->buf->pages[pgnr];
> + kaddr = page_address(*xdr->page_ptr);
> +
> + pgoff = base & ~PAGE_MASK;
> + xdr->p = (__be32*)(kaddr + pgoff);
> +
> + pgend = pgoff + len;
> + if (pgend > PAGE_SIZE)
> + pgend = PAGE_SIZE;
> + xdr->end = (__be32*)(kaddr + pgend);
> + xdr->iov = NULL;
> + return 0;
> +}
> +
> +static void xdr_set_next_page(struct xdr_stream *xdr)
> +{
> + unsigned int newbase;
> +
> + newbase = (1 + xdr->page_ptr - xdr->buf->pages) << PAGE_SHIFT;
> + newbase -= xdr->buf->page_base;
> +
> + if (xdr_set_page_base(xdr, newbase, PAGE_SIZE) < 0)
> + xdr_set_iov(xdr, xdr->buf->tail, NULL, xdr->buf->len);
> +}
> +
> +static bool xdr_set_next_buffer(struct xdr_stream *xdr)
> +{
> + if (xdr->page_ptr != NULL)
> + xdr_set_next_page(xdr);
> + else if (xdr->iov == xdr->buf->head) {
> + if (xdr_set_page_base(xdr, 0, PAGE_SIZE) < 0)
> + xdr_set_iov(xdr, xdr->buf->tail, NULL, xdr->buf->len);
> + }
> + return xdr->p != xdr->end;
> +}
> +
> /**
> * xdr_init_decode - Initialize an xdr_stream for decoding data.
> * @xdr: pointer to xdr_stream struct
> @@ -560,41 +628,67 @@ EXPORT_SYMBOL_GPL(xdr_write_pages);
> */
> void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
> {
> - struct kvec *iov = buf->head;
> - unsigned int len = iov->iov_len;
> -
> - if (len > buf->len)
> - len = buf->len;
> xdr->buf = buf;
> - xdr->iov = iov;
> - xdr->p = p;
> - xdr->end = (__be32 *)((char *)iov->iov_base + len);
> + xdr->scratch.iov_base = NULL;
> + xdr->scratch.iov_len = 0;
> + if (buf->head[0].iov_len != 0)
> + xdr_set_iov(xdr, buf->head, p, buf->len);
> + else if (buf->page_len != 0)
> + xdr_set_page_base(xdr, 0, buf->len);
> }
> EXPORT_SYMBOL_GPL(xdr_init_decode);
>
> -/**
> - * xdr_inline_peek - Allow read-ahead in the XDR data stream
> - * @xdr: pointer to xdr_stream struct
> - * @nbytes: number of bytes of data to decode
> - *
> - * Check if the input buffer is long enough to enable us to decode
> - * 'nbytes' more bytes of data starting at the current position.
> - * If so return the current pointer without updating the current
> - * pointer position.
> - */
> -__be32 * xdr_inline_peek(struct xdr_stream *xdr, size_t nbytes)
> +static __be32 * __xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes)
> {
> __be32 *p = xdr->p;
> __be32 *q = p + XDR_QUADLEN(nbytes);
>
> if (unlikely(q > xdr->end || q < p))
> return NULL;
> + xdr->p = q;
> return p;
> }
> -EXPORT_SYMBOL_GPL(xdr_inline_peek);
>
> /**
> - * xdr_inline_decode - Retrieve non-page XDR data to decode
> + * xdr_set_scratch_buffer - Attach a scratch buffer for decoding data.
> + * @xdr: pointer to xdr_stream struct
> + * @buf: pointer to an empty buffer
> + * @buflen: size of 'buf'
> + *
> + * The scratch buffer is used when decoding from an array of pages.
> + * If an xdr_inline_decode() call spans across page boundaries, then
> + * we copy the data into the scratch buffer in order to allow linear
> + * access.
> + */
> +void xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen)
> +{
> + xdr->scratch.iov_base = buf;
> + xdr->scratch.iov_len = buflen;
> +}
> +EXPORT_SYMBOL_GPL(xdr_set_scratch_buffer);
> +
> +static __be32 *xdr_copy_to_scratch(struct xdr_stream *xdr, size_t nbytes)
> +{
> + __be32 *p;
> + void *cpdest = xdr->scratch.iov_base;
> + size_t cplen = (char *)xdr->end - (char *)xdr->p;
> +
> + if (nbytes > xdr->scratch.iov_len)
> + return NULL;
> + memcpy(cpdest, xdr->p, cplen);
> + cpdest += cplen;
> + nbytes -= cplen;
> + if (!xdr_set_next_buffer(xdr))
> + return NULL;
> + p = __xdr_inline_decode(xdr, nbytes);
> + if (p == NULL)
> + return NULL;
> + memcpy(cpdest, p, nbytes);
> + return xdr->scratch.iov_base;
> +}
> +
> +/**
> + * xdr_inline_decode - Retrieve XDR data to decode
> * @xdr: pointer to xdr_stream struct
> * @nbytes: number of bytes of data to decode
> *
> @@ -605,13 +699,16 @@ EXPORT_SYMBOL_GPL(xdr_inline_peek);
> */
> __be32 * xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes)
> {
> - __be32 *p = xdr->p;
> - __be32 *q = p + XDR_QUADLEN(nbytes);
> + __be32 *p;
>
> - if (unlikely(q > xdr->end || q < p))
> + if (nbytes == 0)
> + return xdr->p;
> + if (xdr->p == xdr->end && !xdr_set_next_buffer(xdr))
> return NULL;
> - xdr->p = q;
> - return p;
> + p = __xdr_inline_decode(xdr, nbytes);
> + if (p != NULL)
> + return p;
> + return xdr_copy_to_scratch(xdr, nbytes);
> }
> EXPORT_SYMBOL_GPL(xdr_inline_decode);
>
> @@ -671,16 +768,12 @@ EXPORT_SYMBOL_GPL(xdr_read_pages);
> */
> void xdr_enter_page(struct xdr_stream *xdr, unsigned int len)
> {
> - char * kaddr = page_address(xdr->buf->pages[0]);
> xdr_read_pages(xdr, len);
> /*
> * Position current pointer at beginning of tail, and
> * set remaining message length.
> */
> - if (len > PAGE_CACHE_SIZE - xdr->buf->page_base)
> - len = PAGE_CACHE_SIZE - xdr->buf->page_base;
> - xdr->p = (__be32 *)(kaddr + xdr->buf->page_base);
> - xdr->end = (__be32 *)((char *)xdr->p + len);
> + xdr_set_page_base(xdr, 0, len);
> }
> EXPORT_SYMBOL_GPL(xdr_enter_page);
>
> --
> 1.7.3.4
>
>
>
> --
> Trond Myklebust
> Linux NFS client maintainer
>
> NetApp
> Trond.Myklebust@xxxxxxxxxx
> www.netapp.com
>
>

--
Pengutronix e.K. | Uwe Kleine-König |
Industrial Linux Solutions | http://www.pengutronix.de/ |
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/