Re: still nfs problems [Was: Linux 2.6.37-rc8]

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Trond,

On Sat, Jan 08, 2011 at 06:15:51PM -0500, Trond Myklebust wrote:
> ----------------------------------------------------------------------------------- 
It would be great if you could add a "8<" in the line above next time.
Then git-am -c does the right thing (at least I think so).

> From 8b2e60cef5c65eef41ab61286f62dec6bfb1ac27 Mon Sep 17 00:00:00 2001
> From: Trond Myklebust <Trond.Myklebust@xxxxxxxxxx>
> Date: Sat, 8 Jan 2011 17:45:38 -0500
> Subject: [PATCH] NFS: Don't use vm_map_ram() in readdir
> 
> vm_map_ram() is not available on NOMMU platforms, and causes trouble
> on incoherrent architectures such as ARM when we access the page data
> through both the direct and the virtual mapping.
> 
> The alternative is to use the direct mapping to access page data
> for the case when we are not crossing a page boundary, but to copy
> the data into a linear scratch buffer when we are accessing data
> that spans page boundaries.
> 
> Signed-off-by: Trond Myklebust <Trond.Myklebust@xxxxxxxxxx>
Tested-by: Uwe Kleine-König <u.kleine-koenig@xxxxxxxxxxxxxx>

on tx28.

Thanks
Uwe

> ---
>  fs/nfs/dir.c               |   44 ++++++-------
>  fs/nfs/nfs2xdr.c           |    6 --
>  fs/nfs/nfs3xdr.c           |    6 --
>  fs/nfs/nfs4xdr.c           |    6 --
>  include/linux/sunrpc/xdr.h |    4 +-
>  net/sunrpc/xdr.c           |  155 +++++++++++++++++++++++++++++++++++---------
>  6 files changed, 148 insertions(+), 73 deletions(-)
> 
> diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
> index 996dd89..0108cf4 100644
> --- a/fs/nfs/dir.c
> +++ b/fs/nfs/dir.c
> @@ -33,7 +33,6 @@
>  #include <linux/namei.h>
>  #include <linux/mount.h>
>  #include <linux/sched.h>
> -#include <linux/vmalloc.h>
>  #include <linux/kmemleak.h>
>  
>  #include "delegation.h"
> @@ -459,25 +458,26 @@ out:
>  /* Perform conversion from xdr to cache array */
>  static
>  int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry,
> -				void *xdr_page, struct page *page, unsigned int buflen)
> +				struct page **xdr_pages, struct page *page, unsigned int buflen)
>  {
>  	struct xdr_stream stream;
> -	struct xdr_buf buf;
> -	__be32 *ptr = xdr_page;
> +	struct xdr_buf buf = {
> +		.pages = xdr_pages,
> +		.page_len = buflen,
> +		.buflen = buflen,
> +		.len = buflen,
> +	};
> +	struct page *scratch;
>  	struct nfs_cache_array *array;
>  	unsigned int count = 0;
>  	int status;
>  
> -	buf.head->iov_base = xdr_page;
> -	buf.head->iov_len = buflen;
> -	buf.tail->iov_len = 0;
> -	buf.page_base = 0;
> -	buf.page_len = 0;
> -	buf.buflen = buf.head->iov_len;
> -	buf.len = buf.head->iov_len;
> -
> -	xdr_init_decode(&stream, &buf, ptr);
> +	scratch = alloc_page(GFP_KERNEL);
> +	if (scratch == NULL)
> +		return -ENOMEM;
>  
> +	xdr_init_decode(&stream, &buf, NULL);
> +	xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
>  
>  	do {
>  		status = xdr_decode(desc, entry, &stream);
> @@ -506,6 +506,8 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
>  		} else
>  			status = PTR_ERR(array);
>  	}
> +
> +	put_page(scratch);
>  	return status;
>  }
>  
> @@ -521,7 +523,6 @@ static
>  void nfs_readdir_free_large_page(void *ptr, struct page **pages,
>  		unsigned int npages)
>  {
> -	vm_unmap_ram(ptr, npages);
>  	nfs_readdir_free_pagearray(pages, npages);
>  }
>  
> @@ -530,9 +531,8 @@ void nfs_readdir_free_large_page(void *ptr, struct page **pages,
>   * to nfs_readdir_free_large_page
>   */
>  static
> -void *nfs_readdir_large_page(struct page **pages, unsigned int npages)
> +int nfs_readdir_large_page(struct page **pages, unsigned int npages)
>  {
> -	void *ptr;
>  	unsigned int i;
>  
>  	for (i = 0; i < npages; i++) {
> @@ -541,13 +541,11 @@ void *nfs_readdir_large_page(struct page **pages, unsigned int npages)
>  			goto out_freepages;
>  		pages[i] = page;
>  	}
> +	return 0;
>  
> -	ptr = vm_map_ram(pages, npages, 0, PAGE_KERNEL);
> -	if (!IS_ERR_OR_NULL(ptr))
> -		return ptr;
>  out_freepages:
>  	nfs_readdir_free_pagearray(pages, i);
> -	return NULL;
> +	return -ENOMEM;
>  }
>  
>  static
> @@ -577,8 +575,8 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
>  	memset(array, 0, sizeof(struct nfs_cache_array));
>  	array->eof_index = -1;
>  
> -	pages_ptr = nfs_readdir_large_page(pages, array_size);
> -	if (!pages_ptr)
> +	status = nfs_readdir_large_page(pages, array_size);
> +	if (status < 0)
>  		goto out_release_array;
>  	do {
>  		unsigned int pglen;
> @@ -587,7 +585,7 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
>  		if (status < 0)
>  			break;
>  		pglen = status;
> -		status = nfs_readdir_page_filler(desc, &entry, pages_ptr, page, pglen);
> +		status = nfs_readdir_page_filler(desc, &entry, pages, page, pglen);
>  		if (status < 0) {
>  			if (status == -ENOSPC)
>  				status = 0;
> diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
> index 5914a19..b382a1b 100644
> --- a/fs/nfs/nfs2xdr.c
> +++ b/fs/nfs/nfs2xdr.c
> @@ -487,12 +487,6 @@ nfs_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_se
>  
>  	entry->d_type = DT_UNKNOWN;
>  
> -	p = xdr_inline_peek(xdr, 8);
> -	if (p != NULL)
> -		entry->eof = !p[0] && p[1];
> -	else
> -		entry->eof = 0;
> -
>  	return p;
>  
>  out_overflow:
> diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
> index f6cc60f..ba91236 100644
> --- a/fs/nfs/nfs3xdr.c
> +++ b/fs/nfs/nfs3xdr.c
> @@ -647,12 +647,6 @@ nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_s
>  			memset((u8*)(entry->fh), 0, sizeof(*entry->fh));
>  	}
>  
> -	p = xdr_inline_peek(xdr, 8);
> -	if (p != NULL)
> -		entry->eof = !p[0] && p[1];
> -	else
> -		entry->eof = 0;
> -
>  	return p;
>  
>  out_overflow:
> diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
> index 9f1826b..0662a98 100644
> --- a/fs/nfs/nfs4xdr.c
> +++ b/fs/nfs/nfs4xdr.c
> @@ -6215,12 +6215,6 @@ __be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
>  	if (verify_attr_len(xdr, p, len) < 0)
>  		goto out_overflow;
>  
> -	p = xdr_inline_peek(xdr, 8);
> -	if (p != NULL)
> -		entry->eof = !p[0] && p[1];
> -	else
> -		entry->eof = 0;
> -
>  	return p;
>  
>  out_overflow:
> diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
> index 498ab93..7783c68 100644
> --- a/include/linux/sunrpc/xdr.h
> +++ b/include/linux/sunrpc/xdr.h
> @@ -201,6 +201,8 @@ struct xdr_stream {
>  
>  	__be32 *end;		/* end of available buffer space */
>  	struct kvec *iov;	/* pointer to the current kvec */
> +	struct kvec scratch;	/* Scratch buffer */
> +	struct page **page_ptr;	/* pointer to the current page */
>  };
>  
>  extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
> @@ -208,7 +210,7 @@ extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes);
>  extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages,
>  		unsigned int base, unsigned int len);
>  extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
> -extern __be32 *xdr_inline_peek(struct xdr_stream *xdr, size_t nbytes);
> +extern void xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen);
>  extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes);
>  extern void xdr_read_pages(struct xdr_stream *xdr, unsigned int len);
>  extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len);
> diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
> index cd9e841..679cd67 100644
> --- a/net/sunrpc/xdr.c
> +++ b/net/sunrpc/xdr.c
> @@ -552,6 +552,74 @@ void xdr_write_pages(struct xdr_stream *xdr, struct page **pages, unsigned int b
>  }
>  EXPORT_SYMBOL_GPL(xdr_write_pages);
>  
> +static void xdr_set_iov(struct xdr_stream *xdr, struct kvec *iov,
> +		__be32 *p, unsigned int len)
> +{
> +	if (len > iov->iov_len)
> +		len = iov->iov_len;
> +	if (p == NULL)
> +		p = (__be32*)iov->iov_base;
> +	xdr->p = p;
> +	xdr->end = (__be32*)(iov->iov_base + len);
> +	xdr->iov = iov;
> +	xdr->page_ptr = NULL;
> +}
> +
> +static int xdr_set_page_base(struct xdr_stream *xdr,
> +		unsigned int base, unsigned int len)
> +{
> +	unsigned int pgnr;
> +	unsigned int maxlen;
> +	unsigned int pgoff;
> +	unsigned int pgend;
> +	void *kaddr;
> +
> +	maxlen = xdr->buf->page_len;
> +	if (base >= maxlen)
> +		return -EINVAL;
> +	maxlen -= base;
> +	if (len > maxlen)
> +		len = maxlen;
> +
> +	base += xdr->buf->page_base;
> +
> +	pgnr = base >> PAGE_SHIFT;
> +	xdr->page_ptr = &xdr->buf->pages[pgnr];
> +	kaddr = page_address(*xdr->page_ptr);
> +
> +	pgoff = base & ~PAGE_MASK;
> +	xdr->p = (__be32*)(kaddr + pgoff);
> +
> +	pgend = pgoff + len;
> +	if (pgend > PAGE_SIZE)
> +		pgend = PAGE_SIZE;
> +	xdr->end = (__be32*)(kaddr + pgend);
> +	xdr->iov = NULL;
> +	return 0;
> +}
> +
> +static void xdr_set_next_page(struct xdr_stream *xdr)
> +{
> +	unsigned int newbase;
> +
> +	newbase = (1 + xdr->page_ptr - xdr->buf->pages) << PAGE_SHIFT;
> +	newbase -= xdr->buf->page_base;
> +
> +	if (xdr_set_page_base(xdr, newbase, PAGE_SIZE) < 0)
> +		xdr_set_iov(xdr, xdr->buf->tail, NULL, xdr->buf->len);
> +}
> +
> +static bool xdr_set_next_buffer(struct xdr_stream *xdr)
> +{
> +	if (xdr->page_ptr != NULL)
> +		xdr_set_next_page(xdr);
> +	else if (xdr->iov == xdr->buf->head) {
> +		if (xdr_set_page_base(xdr, 0, PAGE_SIZE) < 0)
> +			xdr_set_iov(xdr, xdr->buf->tail, NULL, xdr->buf->len);
> +	}
> +	return xdr->p != xdr->end;
> +}
> +
>  /**
>   * xdr_init_decode - Initialize an xdr_stream for decoding data.
>   * @xdr: pointer to xdr_stream struct
> @@ -560,41 +628,67 @@ EXPORT_SYMBOL_GPL(xdr_write_pages);
>   */
>  void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
>  {
> -	struct kvec *iov = buf->head;
> -	unsigned int len = iov->iov_len;
> -
> -	if (len > buf->len)
> -		len = buf->len;
>  	xdr->buf = buf;
> -	xdr->iov = iov;
> -	xdr->p = p;
> -	xdr->end = (__be32 *)((char *)iov->iov_base + len);
> +	xdr->scratch.iov_base = NULL;
> +	xdr->scratch.iov_len = 0;
> +	if (buf->head[0].iov_len != 0)
> +		xdr_set_iov(xdr, buf->head, p, buf->len);
> +	else if (buf->page_len != 0)
> +		xdr_set_page_base(xdr, 0, buf->len);
>  }
>  EXPORT_SYMBOL_GPL(xdr_init_decode);
>  
> -/**
> - * xdr_inline_peek - Allow read-ahead in the XDR data stream
> - * @xdr: pointer to xdr_stream struct
> - * @nbytes: number of bytes of data to decode
> - *
> - * Check if the input buffer is long enough to enable us to decode
> - * 'nbytes' more bytes of data starting at the current position.
> - * If so return the current pointer without updating the current
> - * pointer position.
> - */
> -__be32 * xdr_inline_peek(struct xdr_stream *xdr, size_t nbytes)
> +static __be32 * __xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes)
>  {
>  	__be32 *p = xdr->p;
>  	__be32 *q = p + XDR_QUADLEN(nbytes);
>  
>  	if (unlikely(q > xdr->end || q < p))
>  		return NULL;
> +	xdr->p = q;
>  	return p;
>  }
> -EXPORT_SYMBOL_GPL(xdr_inline_peek);
>  
>  /**
> - * xdr_inline_decode - Retrieve non-page XDR data to decode
> + * xdr_set_scratch_buffer - Attach a scratch buffer for decoding data.
> + * @xdr: pointer to xdr_stream struct
> + * @buf: pointer to an empty buffer
> + * @buflen: size of 'buf'
> + *
> + * The scratch buffer is used when decoding from an array of pages.
> + * If an xdr_inline_decode() call spans across page boundaries, then
> + * we copy the data into the scratch buffer in order to allow linear
> + * access.
> + */
> +void xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen)
> +{
> +	xdr->scratch.iov_base = buf;
> +	xdr->scratch.iov_len = buflen;
> +}
> +EXPORT_SYMBOL_GPL(xdr_set_scratch_buffer);
> +
> +static __be32 *xdr_copy_to_scratch(struct xdr_stream *xdr, size_t nbytes)
> +{
> +	__be32 *p;
> +	void *cpdest = xdr->scratch.iov_base;
> +	size_t cplen = (char *)xdr->end - (char *)xdr->p;
> +
> +	if (nbytes > xdr->scratch.iov_len)
> +		return NULL;
> +	memcpy(cpdest, xdr->p, cplen);
> +	cpdest += cplen;
> +	nbytes -= cplen;
> +	if (!xdr_set_next_buffer(xdr))
> +		return NULL;
> +	p = __xdr_inline_decode(xdr, nbytes);
> +	if (p == NULL)
> +		return NULL;
> +	memcpy(cpdest, p, nbytes);
> +	return xdr->scratch.iov_base;
> +}
> +
> +/**
> + * xdr_inline_decode - Retrieve XDR data to decode
>   * @xdr: pointer to xdr_stream struct
>   * @nbytes: number of bytes of data to decode
>   *
> @@ -605,13 +699,16 @@ EXPORT_SYMBOL_GPL(xdr_inline_peek);
>   */
>  __be32 * xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes)
>  {
> -	__be32 *p = xdr->p;
> -	__be32 *q = p + XDR_QUADLEN(nbytes);
> +	__be32 *p;
>  
> -	if (unlikely(q > xdr->end || q < p))
> +	if (nbytes == 0)
> +		return xdr->p;
> +	if (xdr->p == xdr->end && !xdr_set_next_buffer(xdr))
>  		return NULL;
> -	xdr->p = q;
> -	return p;
> +	p = __xdr_inline_decode(xdr, nbytes);
> +	if (p != NULL)
> +		return p;
> +	return xdr_copy_to_scratch(xdr, nbytes);
>  }
>  EXPORT_SYMBOL_GPL(xdr_inline_decode);
>  
> @@ -671,16 +768,12 @@ EXPORT_SYMBOL_GPL(xdr_read_pages);
>   */
>  void xdr_enter_page(struct xdr_stream *xdr, unsigned int len)
>  {
> -	char * kaddr = page_address(xdr->buf->pages[0]);
>  	xdr_read_pages(xdr, len);
>  	/*
>  	 * Position current pointer at beginning of tail, and
>  	 * set remaining message length.
>  	 */
> -	if (len > PAGE_CACHE_SIZE - xdr->buf->page_base)
> -		len = PAGE_CACHE_SIZE - xdr->buf->page_base;
> -	xdr->p = (__be32 *)(kaddr + xdr->buf->page_base);
> -	xdr->end = (__be32 *)((char *)xdr->p + len);
> +	xdr_set_page_base(xdr, 0, len);
>  }
>  EXPORT_SYMBOL_GPL(xdr_enter_page);
>  
> -- 
> 1.7.3.4
> 
> 
> 
> -- 
> Trond Myklebust
> Linux NFS client maintainer
> 
> NetApp
> Trond.Myklebust@xxxxxxxxxx
> www.netapp.com
> 
> 

-- 
Pengutronix e.K.                           | Uwe Kleine-König            |
Industrial Linux Solutions                 | http://www.pengutronix.de/  |
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Filesystem Development]     [Linux USB Development]     [Linux Media Development]     [Video for Linux]     [Linux NILFS]     [Linux Audio Users]     [Yosemite Info]     [Linux SCSI]

  Powered by Linux