Re: [RFC][PATCH] fix csum_and_copy_..._user() idiocy. Re: AW: [PATCH] amd64: Fix csum_partial_copy_generic()

Thomas Gleixner <tglx@xxxxxxxxxxxxx> · Mon, 23 Oct 2023 12:37:58 +0200

On Sun, Oct 22 2023 at 20:46, Al Viro wrote:
> @@ -113,14 +113,14 @@ csum_partial_cfu_aligned(const unsigned long __user *src, unsigned long *dst,
>  		*dst = word | tmp;
>  		checksum += carry;
>  	}
> -	return checksum;
> +	return from64to16 (checksum);

  from64to16(checksum); all over the place

>  
>  #define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER
>  #define _HAVE_ARCH_CSUM_AND_COPY
>  static inline
> -__wsum csum_and_copy_from_user(const void __user *src, void *dst, int len)
> +__wsum_fault csum_and_copy_from_user(const void __user *src, void *dst, int len)
>  {
>  	if (!access_ok(src, len))
> -		return 0;
> +		return -1;

  return CSUM_FAULT; 

>  /*
> diff --git a/arch/arm/lib/csumpartialcopygeneric.S b/arch/arm/lib/csumpartialcopygeneric.S
> index 0fd5c10e90a7..5db935eaf165 100644
> --- a/arch/arm/lib/csumpartialcopygeneric.S
> +++ b/arch/arm/lib/csumpartialcopygeneric.S
> @@ -86,7 +86,7 @@ sum	.req	r3
>  
>  FN_ENTRY
>  		save_regs
> -		mov	sum, #-1
> +		mov	sum, #0
>  
>  		cmp	len, #8			@ Ensure that we have at least
>  		blo	.Lless8			@ 8 bytes to copy.
> @@ -160,6 +160,7 @@ FN_ENTRY
>  		ldr	sum, [sp, #0]		@ dst
>  		tst	sum, #1
>  		movne	r0, r0, ror #8
> +		mov	r1, #0
>  		load_regs
>  
>  .Lsrc_not_aligned:
> diff --git a/arch/arm/lib/csumpartialcopyuser.S b/arch/arm/lib/csumpartialcopyuser.S
> index 6928781e6bee..f273c9667914 100644
> --- a/arch/arm/lib/csumpartialcopyuser.S
> +++ b/arch/arm/lib/csumpartialcopyuser.S
> @@ -73,11 +73,11 @@
>  #include "csumpartialcopygeneric.S"
>  
>  /*
> - * We report fault by returning 0 csum - impossible in normal case, since
> - * we start with 0xffffffff for initial sum.
> + * We report fault by returning ~0ULL csum
>   */

There is also a stale comment a few lines further up.

>  		.pushsection .text.fixup,"ax"
>  		.align	4
> -9001:		mov	r0, #0
> +9001:		mov	r0, #-1
> +		mov	r1, #-1
>  		load_regs
>  		.popsection
>  #include <linux/errno.h>
> -#include <asm/types.h>
> +#include <linux/bitops.h>
>  #include <asm/byteorder.h>
> +
> +typedef u64 __bitwise __wsum_fault;

newline please.

> +static inline __wsum_fault to_wsum_fault(__wsum v)
> +{
> +#if defined(CONFIG_64BIT) || defined(__LITTLE_ENDIAN__)
> +	return (__force __wsum_fault)v;
> +#else
> +	return (__force __wsum_fault)((__force u64)v << 32);
> +#endif
> +}
> +
> +static inline __wsum_fault from_wsum_fault(__wsum v)
> +{
> +#if defined(CONFIG_64BIT) || defined(__LITTLE_ENDIAN__)
> +	return (__force __wsum)v;
> +#else
> +	return (__force __wsum)((__force u64)v >> 32);
> +#endif
> +}
> +
> +static inline bool wsum_fault_check(__wsum_fault v)
> +{
> +#if defined(CONFIG_64BIT) || defined(__LITTLE_ENDIAN__)
> +	return (__force s64)v < 0;
> +#else
> +	return (int)(__force u32)v < 0;

Why not __force s32 right away?

>  #include <asm/checksum.h>
>  #if !defined(_HAVE_ARCH_COPY_AND_CSUM_FROM_USER) || !defined(HAVE_CSUM_COPY_USER)
>  #include <linux/uaccess.h>
> @@ -25,24 +57,24 @@
>  
>  #ifndef _HAVE_ARCH_COPY_AND_CSUM_FROM_USER
>  static __always_inline
> -__wsum csum_and_copy_from_user (const void __user *src, void *dst,
> +__wsum_fault csum_and_copy_from_user (const void __user *src, void *dst,
>  				      int len)
>  {
>  	if (copy_from_user(dst, src, len))
> -		return 0;
> -	return csum_partial(dst, len, ~0U);
> +		return CSUM_FAULT;
> +	return to_wsum_fault(csum_partial(dst, len, 0));
>  }
>  #endif
>  #ifndef HAVE_CSUM_COPY_USER
> -static __always_inline __wsum csum_and_copy_to_user
> +static __always_inline __wsum_fault csum_and_copy_to_user
>  (const void *src, void __user *dst, int len)
>  {
> -	__wsum sum = csum_partial(src, len, ~0U);
> +	__wsum sum = csum_partial(src, len, 0);
>  
>  	if (copy_to_user(dst, src, len) == 0)
> -		return sum;
> -	return 0;
> +		return to_wsum_fault(sum);
> +	return CSUM_FAULT;

  	if (copy_to_user(dst, src, len))
		return CSUM_FAULT;
	return to_wsum_fault(sum);

So it follows the pattern of csum_and_copy_from_user() above?

>  size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
>  			       struct iov_iter *i)
>  {
> -	__wsum sum, next;
> +	__wsum sum;
> +	__wsum_fault next;
>  	sum = *csum;
>  	if (WARN_ON_ONCE(!i->data_source))
>  		return 0;
>  
>  	iterate_and_advance(i, bytes, base, len, off, ({
>  		next = csum_and_copy_from_user(base, addr + off, len);
> -		sum = csum_block_add(sum, next, off);
> -		next ? 0 : len;
> +		sum = csum_block_add(sum, from_wsum_fault(next), off);
> +		likely(!wsum_fault_check(next)) ? 0 : len;

This macro maze is confusing as hell.

Looking at iterate_buf() which is the least convoluted. That resolves to
the following:

     len = bytes;
     ...
     next = csum_and_copy_from_user(...);
     ...
     len -= !wsum_fault_check(next) ? 0 : len;
     ...
     bytes = len;
     ...
     return bytes;

So it correctly returns 'bytes' for the success case and 0 for the fault
case.

Now decrypting iterate_iovec():

    off = 0;

    do {
       ....
       len -= !wsum_fault_check(next) ? 0 : len;
       off += len;
       skip += len;
       bytes- -= len;
       if (skip < __p->iov_len)        <- breaks on fault
          break;
       ...
    } while(bytes);

    bytes = off;
    ...
    return bytes;

Which means that if the first vector is successfully copied, then 'off'
is greater 0. A fault on the second one will correctly break out of the
loop, but the function will incorrectly return a value > 0, i.e. the
length of the first iteration.

As the callers just check for != 0 such a partial copy is considered
success, no?

So instead of 

	likely(!wsum_fault_check(next)) ? 0 : len;

shouldn't this just do:

	if (unlikely(wsum_fault_check(next))
		return 0;
        len;

for simplicity and mental sanity sake?

Thanks,

        tglx