Re: [PATCH 03/10] SUNRPC: Provide functions for managing universal addresses

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, 2009-07-15 at 17:37 -0400, Chuck Lever wrote:
> Introduce a set of functions in the kernel's RPC implementation for
> converting between a socket address and either a standard
> presentation address string or an RPC universal address.
> 
> The universal address functions will be used to encode and decode
> RPCB_FOO and NFSv4 SETCLIENTID arguments.  The other functions are
> part of a previous promise to deliver shared functions that can be
> used by upper-layer protocols to display and manipulate IP
> addresses.
> 
> The kernel's current address printf formatters were designed
> specifically for kernel to user-space APIs that require a particular
> string format for socket addresses, thus are somewhat limited for the
> purposes of sunrpc.ko.  The formatter for IPv6 addresses, %pI6, does
> not support short-handing or scope IDs.  Also, these printf formatters
> are unique per address family, so a separate formatter string is
> required for printing AF_INET and AF_INET6 addresses.
> 
> Signed-off-by: Chuck Lever <chuck.lever@xxxxxxxxxx>
> ---
> 
>  fs/nfs/internal.h           |    2 
>  include/linux/sunrpc/clnt.h |   12 +
>  net/sunrpc/Makefile         |    2 
>  net/sunrpc/addr.c           |  524 +++++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 537 insertions(+), 3 deletions(-)
>  create mode 100644 net/sunrpc/addr.c
> 
> diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
> index d3823d7..a19b7b9 100644
> --- a/fs/nfs/internal.h
> +++ b/fs/nfs/internal.h
> @@ -370,8 +370,6 @@ unsigned int nfs_page_array_len(unsigned int base, size_t len)
>  		PAGE_SIZE - 1) >> PAGE_SHIFT;
>  }
>  
> -#define IPV6_SCOPE_DELIMITER	'%'
> -
>  /*
>   * Set the port number in an address.  Be agnostic about the address
>   * family.
> diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
> index 37881f1..bf38c4a 100644
> --- a/include/linux/sunrpc/clnt.h
> +++ b/include/linux/sunrpc/clnt.h
> @@ -151,5 +151,17 @@ void		rpc_force_rebind(struct rpc_clnt *);
>  size_t		rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t);
>  const char	*rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t);
>  
> +unsigned short	rpc_get_port(const struct sockaddr *sap);
> +void		rpc_set_port(struct sockaddr *sap, const unsigned short port);
> +size_t		rpc_ntop(const struct sockaddr *, char *, const size_t);
> +size_t		rpc_pton(const char *, const size_t,
> +			 struct sockaddr *, const size_t);
> +char *		rpc_sockaddr2uaddr(const struct sockaddr *);
> +size_t		rpc_uaddr2sockaddr(const char *, const size_t,
> +				   struct sockaddr *, const size_t);
> +
> +#define IPV6_SCOPE_DELIMITER		'%'
> +#define IPV6_SCOPE_ID_LEN		sizeof("%nnnnnnnnnn")
> +
>  #endif /* __KERNEL__ */
>  #endif /* _LINUX_SUNRPC_CLNT_H */
> diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile
> index 7a594cc..6020f5b 100644
> --- a/net/sunrpc/Makefile
> +++ b/net/sunrpc/Makefile
> @@ -12,7 +12,7 @@ obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma/
>  sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \
>  	    auth.o auth_null.o auth_unix.o auth_generic.o \
>  	    svc.o svcsock.o svcauth.o svcauth_unix.o \
> -	    rpcb_clnt.o timer.o xdr.o \
> +	    addr.o rpcb_clnt.o timer.o xdr.o \
>  	    sunrpc_syms.o cache.o rpc_pipe.o \
>  	    svc_xprt.o
>  sunrpc-$(CONFIG_NFS_V4_1) += backchannel_rqst.o bc_svc.o
> diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c
> new file mode 100644
> index 0000000..9ddcffd
> --- /dev/null
> +++ b/net/sunrpc/addr.c
> @@ -0,0 +1,524 @@
> +/*
> + * Copyright 2009, Oracle.  All rights reserved.
> + *
> + * Convert socket addresses to presentation addresses and universal
> + * addresses, and vice versa.
> + *
> + * Universal addresses are introduced by RFC 1833 and further refined by
> + * recent RFCs describing NFSv4.  The universal address format is part
> + * of the external (network) interface provided by rpcbind version 3
> + * and 4, and by NFSv4.  Such an address is a string containing a
> + * presentation format IP address followed by a port number in
> + * "hibyte.lobyte" format.
> + *
> + * There are some challenges to constructing universal addresses.
> + * In particular, constructing a presentation address string for IPv4
> + * is straightforward, but it's not so easy for IPv6 addresses.  Such
> + * addresses can be "short-handed" to remove the longest string of
> + * zeros, and mapped v4 presentation addresses can appear in a special
> + * dotted-quad form.  Refer to RFC 4291, Section 2.2 for details on
> + * IPv6 presentation formats.
> + *
> + * IPv6 addresses can also include a scope ID, typically denoted by
> + * a '%' followed by a device name or a non-negative integer.
> + */
> +
> +#include <linux/kernel.h>
> +#include <linux/types.h>
> +#include <linux/socket.h>
> +#include <linux/in.h>
> +#include <linux/in6.h>
> +#include <linux/inet.h>
> +
> +#include <net/ipv6.h>
> +
> +#include <linux/sunrpc/clnt.h>
> +
> +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
> +
> +struct rpc_zeros {
> +	unsigned int	count;
> +	int		index;
> +};
> +
> +static const struct rpc_zeros zeros_init = {
> +	.count		= 0,
> +	.index		= -1,
> +};
> +
> +static void rpc_find_longest_zero_run(const struct in6_addr *addr,
> +				      struct rpc_zeros *longest)
> +{
> +	struct rpc_zeros zeros;
> +	unsigned int i;
> +
> +	zeros = zeros_init;
> +	*longest = zeros_init;
                     ^^^^^^^^^^ Why do this when you could just set
longest->index to -1?

> +
> +	for (i = 0; i < ARRAY_SIZE(addr->s6_addr16); i++) {
> +		if (addr->s6_addr16[i] == 0) {
> +			if (zeros.index == -1)
> +				zeros.index = i;
> +			zeros.count++;
> +		} else {
> +			if (zeros.count > longest->count ||
> +			    longest->index == -1)
> +				*longest = zeros;
> +			zeros = zeros_init;
> +		}
> +	}
> +
> +	/*
> +	 * Maybe the last run of zeros didn't terminate.
> +	 * Not likely for a host address.
> +	 */
> +	if (zeros.index != -1) {
> +		if (zeros.count > longest->count ||
> +		    longest->index == -1)
> +			*longest = zeros;
> +	}
> +}
> +
> +/*
> + * Append a set of halfwords from @addr onto the string in @buf.
> + *
> + * Returns the new length of the string in @buf, or zero if
> + * some error occurs.
> + */
> +static size_t rpc_append_halfword6(const struct in6_addr *addr,
> +				   const char *fmt,
> +				   const unsigned int start,
> +				   const unsigned int end,
> +				   char *buf, const size_t buflen)
> +{
> +	char tmp[sizeof("ffff:") + 1];

You are passing 'fmt' as an argument, yet you assume it will have a
format that makes sense of the above 'sizeof' calculation? Why not just
drop the ':', and always use fmt=="%x"?

As far as I can see, that would simplify rpc_ntop6_shorthand too...

> +	unsigned int i;
> +	size_t len = 0;
> +
> +	for (i = start; i < end; i++) {
> +		len = snprintf(tmp, sizeof(tmp), fmt,
> +					ntohs(addr->s6_addr16[i]));
> +		if (unlikely(len > sizeof(tmp)))
> +			return 0;
> +
> +		len = strlcat(buf, tmp, buflen);
> +		if (unlikely(len > buflen))
> +			return 0;
> +	}
> +
> +	return len;
> +}
> +
> +/*
> + * This implementation depends on the Linux kernel's definition of
> + * in6_addr, which is a union of arrays that facilities dissecting
> + * the address into 16-bit chunks.
> + */
> +static size_t rpc_ntop6_shorthand(const struct in6_addr *addr,
> +				  struct rpc_zeros *longest,
> +				  char *buf, const size_t buflen)
> +{
> +	size_t len;
> +
> +	/*
> +	 * Left half, before the longest string of zeros
> +	 */
> +	buf[0] = '\0';
> +	if (longest->index == 0) {
> +		len = strlcat(buf, ":", buflen);
> +		if (unlikely(len > buflen))
> +			return 0;
> +	} else {
> +		len = rpc_append_halfword6(addr, "%x:",
> +					0, longest->index, buf, buflen);
> +		if (unlikely(len == 0))
> +			return 0;
> +	}
> +
> +	/*
> +	 * Right half, after the longest string of zeros
> +	 */
> +	if (longest->index + longest->count >= ARRAY_SIZE(addr->s6_addr16)) {
> +		len = strlcat(buf, ":", buflen);
> +		if (unlikely(len > buflen))
> +			return 0;
> +	} else {
> +		len = rpc_append_halfword6(addr, ":%x",
> +			longest->index + longest->count,
> +			ARRAY_SIZE(addr->s6_addr16), buf, buflen);
> +		if (unlikely(len == 0))
> +			return 0;
> +	}
> +
> +	return len;
> +}

The shorthand format is, AFAICS, not an obligatory notation. Why should
we implement it, if it involves all this complication?

> +
> +static size_t rpc_ntop6_noscopeid(const struct sockaddr *sap,
> +				  char *buf, const int buflen)
> +{
> +	const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
> +	const struct in6_addr *addr = &sin6->sin6_addr;
> +	struct rpc_zeros longest;
> +
> +	/*
> +	 * RFC 4291, Section 2.2.3
> +	 *
> +	 * Special presentation address format for mapped v4
> +	 * addresses.
> +	 */
> +	if (ipv6_addr_v4mapped(addr))
> +		return snprintf(buf, buflen, "::ffff:%pI4",
> +					&addr->s6_addr32[3]);

Err... Won't that give you ::ffff:x.x.x.x ?

> +	/*
> +	 * RFC 4291, Section 2.2.2
> +	 *
> +	 * See if address can be short-handed.
> +	 */
> +	rpc_find_longest_zero_run(addr, &longest);
> +	if (longest.index != -1 && longest.count > 1)
> +		return rpc_ntop6_shorthand(addr, &longest, buf, buflen);
> +
> +	/*
> +	 * RFC 4291, Section 2.2.1
> +	 *
> +	 * Short-handing not possible, so spell out full
> +	 * address.  We don't want leading zeros in each
> +	 * halfword, so avoid %pI6.
> +	 */
> +	return snprintf(buf, buflen, "%x:%x:%x:%x:%x:%x:%x:%x",
> +		ntohs(addr->s6_addr16[0]), ntohs(addr->s6_addr16[1]),
> +		ntohs(addr->s6_addr16[2]), ntohs(addr->s6_addr16[3]),
> +		ntohs(addr->s6_addr16[4]), ntohs(addr->s6_addr16[5]),
> +		ntohs(addr->s6_addr16[6]), ntohs(addr->s6_addr16[7]));

Why not use %pI6 ?

> +}
> +
> +static size_t rpc_ntop6(const struct sockaddr *sap,
> +			char *buf, const size_t buflen)
> +{
> +	const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
> +	char scopebuf[IPV6_SCOPE_ID_LEN];
> +	size_t len;
> +	int rc;
> +
> +	len = rpc_ntop6_noscopeid(sap, buf, buflen);
> +	if (unlikely(len == 0))
> +		return len;
> +
> +	if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) &&
> +	    !(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_SITELOCAL))
> +		return len;
> +
> +	rc = snprintf(scopebuf, sizeof(scopebuf), "%c%u",
> +			IPV6_SCOPE_DELIMITER, sin6->sin6_scope_id);
> +	if (unlikely((size_t)rc > sizeof(scopebuf)))
> +		return 0;
> +
> +	len += rc;
> +	if (unlikely(len > buflen))
> +		return 0;
> +
> +	strcat(buf, scopebuf);
> +	return len;
> +}
> +
> +#else	/* !(defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)) */
> +
> +static size_t rpc_ntop6_noscopeid(const struct sockaddr *sap,
> +				  char *buf, const int buflen)
> +{
> +	return 0;
> +}
> +
> +static size_t rpc_ntop6(const struct sockaddr *sap,
> +			char *buf, const size_t buflen)
> +{
> +	return 0;
> +}
> +
> +#endif	/* !(defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)) */
> +
> +static int rpc_ntop4(const struct sockaddr *sap,
> +		     char *buf, const size_t buflen)
> +{
> +	const struct sockaddr_in *sin = (struct sockaddr_in *)sap;
> +
> +	return snprintf(buf, buflen, "%pI4", &sin->sin_addr);
> +}
> +
> +/**
> + * rpc_ntop - construct a presentation address in @buf
> + * @sap: socket address
> + * @buf: construction area
> + * @buflen: size of @buf, in bytes
> + *
> + * Plants a %NUL-terminated string in @buf and returns the length
> + * of the string, excluding the %NUL.  Otherwise zero is returned.
> + */
> +size_t rpc_ntop(const struct sockaddr *sap, char *buf, const size_t buflen)
> +{
> +	char tmp[INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN + 1];
> +	size_t len;
> +
> +	switch (sap->sa_family) {
> +	case AF_INET:
> +		len = rpc_ntop4(sap, tmp, sizeof(tmp));
> +		break;
> +	case AF_INET6:
> +		len = rpc_ntop6(sap, tmp, sizeof(tmp));
> +		break;
> +	default:
> +		return 0;
> +	}
> +
> +	if (unlikely(len == 0 || len > sizeof(tmp) || len > buflen))
> +		return 0;
> +	return strlcpy(buf, tmp, buflen);

Why two copies?


> +/**
> + * rpc_sockaddr2uaddr - Construct a universal address string from
> @sap.
> + * @sap: socket address
> + *
> + * Returns a %NUL-terminated string in dynamically allocated memory;
> + * otherwise NULL is returned if an error occurred.  Caller must
> + * free the returned string.
> + */
> +char *rpc_sockaddr2uaddr(const struct sockaddr *sap)
> +{
> +       char portbuf[RPCBIND_MAXUADDRPLEN + 1];
> +       char addrbuf[RPCBIND_MAXUADDRLEN + 1];
> +       unsigned short port;
> +
> +       switch (sap->sa_family) {
> +       case AF_INET:
> +               if (rpc_ntop4(sap, addrbuf, sizeof(addrbuf)) == 0)
> +                       return NULL;
> +               port = ntohs(((struct sockaddr_in *)sap)->sin_port);
> +               break;
> +       case AF_INET6:
> +               if (rpc_ntop6_noscopeid(sap, addrbuf, sizeof(addrbuf))
> == 0)
> +                       return NULL;
> +               port = ntohs(((struct sockaddr_in6 *)sap)->sin6_port);
> +               break;
> +       default:
> +               return NULL;
> +       }
> +
> +       if (snprintf(portbuf, sizeof(portbuf),
> +                    ".%u.%u", port >> 8, port & 0xff) >
> (int)sizeof(portbuf))
> +               return NULL;
> +
> +       if (strlcat(addrbuf, portbuf, sizeof(addrbuf)) >
> sizeof(addrbuf))
> +               return NULL;
> +
> +       return kstrdup(addrbuf, GFP_ATOMIC);

GFP_ATOMIC? Why?

> +}
> +EXPORT_SYMBOL_GPL(rpc_sockaddr2uaddr);

> +unsigned short rpc_get_port(const struct sockaddr *sap)
> +{
> +	switch (sap->sa_family) {
> +	case AF_INET:
> +		return ntohs(((struct sockaddr_in *)sap)->sin_port);
> +	case AF_INET6:
> +		return ntohs(((struct sockaddr_in6 *)sap)->sin6_port);
> +	}
> +	return 0;
> +}
> +EXPORT_SYMBOL_GPL(rpc_get_port);

inline helper?

> +
> +void rpc_set_port(struct sockaddr *sap, const unsigned short port)
> +{
> +	switch (sap->sa_family) {
> +	case AF_INET:
> +		((struct sockaddr_in *)sap)->sin_port = htons(port);
> +		break;
> +	case AF_INET6:
> +		((struct sockaddr_in6 *)sap)->sin6_port = htons(port);
> +		break;
> +	}
> +}
> +EXPORT_SYMBOL_GPL(rpc_set_port);

ditto

--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Filesystem Development]     [Linux USB Development]     [Linux Media Development]     [Video for Linux]     [Linux NILFS]     [Linux Audio Users]     [Yosemite Info]     [Linux SCSI]

  Powered by Linux