On Sun, Oct 22 2023 at 20:46, Al Viro wrote: > @@ -113,14 +113,14 @@ csum_partial_cfu_aligned(const unsigned long __user *src, unsigned long *dst, > *dst = word | tmp; > checksum += carry; > } > - return checksum; > + return from64to16 (checksum); from64to16(checksum); all over the place > > #define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER > #define _HAVE_ARCH_CSUM_AND_COPY > static inline > -__wsum csum_and_copy_from_user(const void __user *src, void *dst, int len) > +__wsum_fault csum_and_copy_from_user(const void __user *src, void *dst, int len) > { > if (!access_ok(src, len)) > - return 0; > + return -1; return CSUM_FAULT; > /* > diff --git a/arch/arm/lib/csumpartialcopygeneric.S b/arch/arm/lib/csumpartialcopygeneric.S > index 0fd5c10e90a7..5db935eaf165 100644 > --- a/arch/arm/lib/csumpartialcopygeneric.S > +++ b/arch/arm/lib/csumpartialcopygeneric.S > @@ -86,7 +86,7 @@ sum .req r3 > > FN_ENTRY > save_regs > - mov sum, #-1 > + mov sum, #0 > > cmp len, #8 @ Ensure that we have at least > blo .Lless8 @ 8 bytes to copy. > @@ -160,6 +160,7 @@ FN_ENTRY > ldr sum, [sp, #0] @ dst > tst sum, #1 > movne r0, r0, ror #8 > + mov r1, #0 > load_regs > > .Lsrc_not_aligned: > diff --git a/arch/arm/lib/csumpartialcopyuser.S b/arch/arm/lib/csumpartialcopyuser.S > index 6928781e6bee..f273c9667914 100644 > --- a/arch/arm/lib/csumpartialcopyuser.S > +++ b/arch/arm/lib/csumpartialcopyuser.S > @@ -73,11 +73,11 @@ > #include "csumpartialcopygeneric.S" > > /* > - * We report fault by returning 0 csum - impossible in normal case, since > - * we start with 0xffffffff for initial sum. > + * We report fault by returning ~0ULL csum > */ There is also a stale comment a few lines further up. > .pushsection .text.fixup,"ax" > .align 4 > -9001: mov r0, #0 > +9001: mov r0, #-1 > + mov r1, #-1 > load_regs > .popsection > #include <linux/errno.h> > -#include <asm/types.h> > +#include <linux/bitops.h> > #include <asm/byteorder.h> > + > +typedef u64 __bitwise __wsum_fault; newline please. > +static inline __wsum_fault to_wsum_fault(__wsum v) > +{ > +#if defined(CONFIG_64BIT) || defined(__LITTLE_ENDIAN__) > + return (__force __wsum_fault)v; > +#else > + return (__force __wsum_fault)((__force u64)v << 32); > +#endif > +} > + > +static inline __wsum_fault from_wsum_fault(__wsum v) > +{ > +#if defined(CONFIG_64BIT) || defined(__LITTLE_ENDIAN__) > + return (__force __wsum)v; > +#else > + return (__force __wsum)((__force u64)v >> 32); > +#endif > +} > + > +static inline bool wsum_fault_check(__wsum_fault v) > +{ > +#if defined(CONFIG_64BIT) || defined(__LITTLE_ENDIAN__) > + return (__force s64)v < 0; > +#else > + return (int)(__force u32)v < 0; Why not __force s32 right away? > #include <asm/checksum.h> > #if !defined(_HAVE_ARCH_COPY_AND_CSUM_FROM_USER) || !defined(HAVE_CSUM_COPY_USER) > #include <linux/uaccess.h> > @@ -25,24 +57,24 @@ > > #ifndef _HAVE_ARCH_COPY_AND_CSUM_FROM_USER > static __always_inline > -__wsum csum_and_copy_from_user (const void __user *src, void *dst, > +__wsum_fault csum_and_copy_from_user (const void __user *src, void *dst, > int len) > { > if (copy_from_user(dst, src, len)) > - return 0; > - return csum_partial(dst, len, ~0U); > + return CSUM_FAULT; > + return to_wsum_fault(csum_partial(dst, len, 0)); > } > #endif > #ifndef HAVE_CSUM_COPY_USER > -static __always_inline __wsum csum_and_copy_to_user > +static __always_inline __wsum_fault csum_and_copy_to_user > (const void *src, void __user *dst, int len) > { > - __wsum sum = csum_partial(src, len, ~0U); > + __wsum sum = csum_partial(src, len, 0); > > if (copy_to_user(dst, src, len) == 0) > - return sum; > - return 0; > + return to_wsum_fault(sum); > + return CSUM_FAULT; if (copy_to_user(dst, src, len)) return CSUM_FAULT; return to_wsum_fault(sum); So it follows the pattern of csum_and_copy_from_user() above? > size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, > struct iov_iter *i) > { > - __wsum sum, next; > + __wsum sum; > + __wsum_fault next; > sum = *csum; > if (WARN_ON_ONCE(!i->data_source)) > return 0; > > iterate_and_advance(i, bytes, base, len, off, ({ > next = csum_and_copy_from_user(base, addr + off, len); > - sum = csum_block_add(sum, next, off); > - next ? 0 : len; > + sum = csum_block_add(sum, from_wsum_fault(next), off); > + likely(!wsum_fault_check(next)) ? 0 : len; This macro maze is confusing as hell. Looking at iterate_buf() which is the least convoluted. That resolves to the following: len = bytes; ... next = csum_and_copy_from_user(...); ... len -= !wsum_fault_check(next) ? 0 : len; ... bytes = len; ... return bytes; So it correctly returns 'bytes' for the success case and 0 for the fault case. Now decrypting iterate_iovec(): off = 0; do { .... len -= !wsum_fault_check(next) ? 0 : len; off += len; skip += len; bytes- -= len; if (skip < __p->iov_len) <- breaks on fault break; ... } while(bytes); bytes = off; ... return bytes; Which means that if the first vector is successfully copied, then 'off' is greater 0. A fault on the second one will correctly break out of the loop, but the function will incorrectly return a value > 0, i.e. the length of the first iteration. As the callers just check for != 0 such a partial copy is considered success, no? So instead of likely(!wsum_fault_check(next)) ? 0 : len; shouldn't this just do: if (unlikely(wsum_fault_check(next)) return 0; len; for simplicity and mental sanity sake? Thanks, tglx