Re: [PATCH net-next v1 6/7] net: fix SO_DEVMEM_DONTNEED looping too long

Stanislav Fomichev <stfomichev@xxxxxxxxx> · Wed, 30 Oct 2024 07:33:40 -0700

On 10/29, Mina Almasry wrote:
> Check we're going to free a reasonable number of frags in token_count
> before starting the loop, to prevent looping too long.
> 
> Also minor code cleanups:
> - Flip checks to reduce indentation.
> - Use sizeof(*tokens) everywhere for consistentcy.
> 
> Cc: Yi Lai <yi1.lai@xxxxxxxxxxxxxxx>
> 
> Signed-off-by: Mina Almasry <almasrymina@xxxxxxxxxx>
> 
> ---
>  net/core/sock.c | 46 ++++++++++++++++++++++++++++------------------
>  1 file changed, 28 insertions(+), 18 deletions(-)
> 
> diff --git a/net/core/sock.c b/net/core/sock.c
> index 7f398bd07fb7..8603b8d87f2e 100644
> --- a/net/core/sock.c
> +++ b/net/core/sock.c
> @@ -1047,11 +1047,12 @@ static int sock_reserve_memory(struct sock *sk, int bytes)
> 
>  #ifdef CONFIG_PAGE_POOL
> 
> -/* This is the number of tokens that the user can SO_DEVMEM_DONTNEED in
> +/* This is the number of frags that the user can SO_DEVMEM_DONTNEED in
>   * 1 syscall. The limit exists to limit the amount of memory the kernel
> - * allocates to copy these tokens.
> + * allocates to copy these tokens, and to prevent looping over the frags for
> + * too long.
>   */
> -#define MAX_DONTNEED_TOKENS 128
> +#define MAX_DONTNEED_FRAGS 1024
> 
>  static noinline_for_stack int
>  sock_devmem_dontneed(struct sock *sk, sockptr_t optval, unsigned int optlen)
> @@ -1059,43 +1060,52 @@ sock_devmem_dontneed(struct sock *sk, sockptr_t optval, unsigned int optlen)
>  	unsigned int num_tokens, i, j, k, netmem_num = 0;
>  	struct dmabuf_token *tokens;
>  	netmem_ref netmems[16];
> +	u64 num_frags = 0;
>  	int ret = 0;
> 
>  	if (!sk_is_tcp(sk))
>  		return -EBADF;
> 
> -	if (optlen % sizeof(struct dmabuf_token) ||
> -	    optlen > sizeof(*tokens) * MAX_DONTNEED_TOKENS)
> +	if (optlen % sizeof(*tokens) ||
> +	    optlen > sizeof(*tokens) * MAX_DONTNEED_FRAGS)
>  		return -EINVAL;
> 
> -	tokens = kvmalloc_array(optlen, sizeof(*tokens), GFP_KERNEL);
> +	num_tokens = optlen / sizeof(*tokens);
> +	tokens = kvmalloc_array(num_tokens, sizeof(*tokens), GFP_KERNEL);
>  	if (!tokens)
>  		return -ENOMEM;
> 
> -	num_tokens = optlen / sizeof(struct dmabuf_token);
>  	if (copy_from_sockptr(tokens, optval, optlen)) {
>  		kvfree(tokens);
>  		return -EFAULT;
>  	}
> 
> +	for (i = 0; i < num_tokens; i++) {
> +		num_frags += tokens[i].token_count;
> +		if (num_frags > MAX_DONTNEED_FRAGS) {
> +			kvfree(tokens);
> +			return -E2BIG;
> +		}
> +	}
> +
>  	xa_lock_bh(&sk->sk_user_frags);
>  	for (i = 0; i < num_tokens; i++) {
>  		for (j = 0; j < tokens[i].token_count; j++) {
>  			netmem_ref netmem = (__force netmem_ref)__xa_erase(
>  				&sk->sk_user_frags, tokens[i].token_start + j);
> 
> -			if (netmem &&
> -			    !WARN_ON_ONCE(!netmem_is_net_iov(netmem))) {
> -				netmems[netmem_num++] = netmem;
> -				if (netmem_num == ARRAY_SIZE(netmems)) {
> -					xa_unlock_bh(&sk->sk_user_frags);
> -					for (k = 0; k < netmem_num; k++)
> -						WARN_ON_ONCE(!napi_pp_put_page(netmems[k]));
> -					netmem_num = 0;
> -					xa_lock_bh(&sk->sk_user_frags);
> -				}
> -				ret++;

[..]

> +			if (!netmem || WARN_ON_ONCE(!netmem_is_net_iov(netmem)))
> +				continue;

Any reason we are not returning explicit error to the callers here?
That probably needs some mechanism to signal which particular one failed
so the users can restart?