Re: [PATCH V2 2/2] csky: atomic: Add custom atomic.h implementation

Guo Ren <guoren@xxxxxxxxxx> · Tue, 12 Apr 2022 13:21:10 +0800



On Mon, Apr 11, 2022 at 10:52 PM <guoren@xxxxxxxxxx> wrote:
>
> From: Guo Ren <guoren@xxxxxxxxxxxxxxxxx>
>
> The generic atomic.h used cmpxchg to implement the atomic
> operations, it will cause daul loop to reduce the forward
> guarantee. The patch implement csky custom atomic operations with
> ldex/stex instructions for the best performance.
>
> Important reference comment by Rutland:
> 8e86f0b409a4 ("arm64: atomics: fix use of acquire + release for
> full barrier semantics")
>
> Link: https://lore.kernel.org/linux-riscv/CAJF2gTSAxpAi=LbAdu7jntZRUa=-dJwL0VfmDfBV5MHB=rcZ-w@xxxxxxxxxxxxxx/T/#m27a0f1342995deae49ce1d0e1f2683f8a181d6c3
> Signed-off-by: Guo Ren <guoren@xxxxxxxxxxxxxxxxx>
> Signed-off-by: Guo Ren <guoren@xxxxxxxxxx>
> Cc: Mark Rutland <mark.rutland@xxxxxxx>
> ---
> Changes in V2:
>  - Fixup use of acquire + release for barrier semantics by Rutland.
> ---
>  arch/csky/include/asm/atomic.h | 130 +++++++++++++++++++++++++++++++++
>  1 file changed, 130 insertions(+)
>  create mode 100644 arch/csky/include/asm/atomic.h
>
> diff --git a/arch/csky/include/asm/atomic.h b/arch/csky/include/asm/atomic.h
> new file mode 100644
> index 000000000000..2e1a22f55ea1
> --- /dev/null
> +++ b/arch/csky/include/asm/atomic.h
> @@ -0,0 +1,130 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +#ifndef __ASM_CSKY_ATOMIC_H
> +#define __ASM_CSKY_ATOMIC_H
> +
> +#ifdef CONFIG_SMP
> +# include <asm-generic/atomic64.h>
> +
> +#include <asm/cmpxchg.h>
> +#include <asm/barrier.h>
> +
> +#define __atomic_acquire_fence()       __smp_acquire_fence()
> +
> +#define __atomic_release_fence()       __smp_release_fence()
> +
> +static __always_inline int arch_atomic_read(const atomic_t *v)
> +{
> +       return READ_ONCE(v->counter);
> +}
> +static __always_inline void arch_atomic_set(atomic_t *v, int i)
> +{
> +       WRITE_ONCE(v->counter, i);
> +}
> +
> +#define ATOMIC_OP(op, asm_op, I)                                       \
> +static __always_inline                                                 \
> +void arch_atomic_##op(int i, atomic_t *v)                              \
> +{                                                                      \
> +       unsigned long tmp;                                              \
> +       __asm__ __volatile__ (                                          \
> +       "1:     ldex.w          %0, (%2)        \n"                     \
> +       "       " #op "         %0, %1          \n"                     \
> +       "       stex.w          %0, (%2)        \n"                     \
> +       "       bez             %0, 1b          \n"                     \
> +       : "=&r" (tmp)                                                   \
> +       : "r" (I), "r" (&v->counter)                                    \
> +       : "memory");                                                    \
> +}
> +
> +ATOMIC_OP(add, add,  i)
> +ATOMIC_OP(sub, add, -i)
> +ATOMIC_OP(and, and,  i)
> +ATOMIC_OP( or,  or,  i)
> +ATOMIC_OP(xor, xor,  i)
Sorry, it should be fixed up by:

#define ATOMIC_OP(op)                                                   \
static __always_inline                                                  \
void arch_atomic_##op(int i, atomic_t *v)                               \
{                                                                       \
        unsigned long tmp;                                              \
        __asm__ __volatile__ (                                          \
        "1:     ldex.w          %0, (%2)        \n"                     \
        "       " #op "         %0, %1          \n"                     \
        "       stex.w          %0, (%2)        \n"                     \
        "       bez             %0, 1b          \n"                     \
        : "=&r" (tmp)                                                   \
        : "r" (i), "r" (&v->counter)                                    \
        : "memory");                                                    \
}

ATOMIC_OP(add)
ATOMIC_OP(sub)
ATOMIC_OP(and)
ATOMIC_OP( or)
ATOMIC_OP(xor)


> +
> +#undef ATOMIC_OP
> +
> +#define ATOMIC_FETCH_OP(op, asm_op, I)                                 \
> +static __always_inline                                                 \
> +int arch_atomic_fetch_##op##_relaxed(int i, atomic_t *v)               \
> +{                                                                      \
> +       register int ret, tmp;                                          \
> +       __asm__ __volatile__ (                                          \
> +       "1:     ldex.w          %0, (%3) \n"                            \
> +       "       mov             %1, %0   \n"                            \
> +       "       " #op "         %0, %2   \n"                            \
> +       "       stex.w          %0, (%3) \n"                            \
> +       "       bez             %0, 1b   \n"                            \
> +               : "=&r" (tmp), "=&r" (ret)                              \
> +               : "r" (I), "r"(&v->counter)                             \
> +               : "memory");                                            \
> +       return ret;                                                     \
> +}
> +
> +#define ATOMIC_OP_RETURN(op, asm_op, c_op, I)                          \
> +static __always_inline                                                 \
> +int arch_atomic_##op##_return_relaxed(int i, atomic_t *v)              \
> +{                                                                      \
> +        return arch_atomic_fetch_##op##_relaxed(i, v) c_op I;          \
> +}
> +
> +#define ATOMIC_OPS(op, asm_op, c_op, I)                                        \
> +        ATOMIC_FETCH_OP( op, asm_op,       I)                          \
> +        ATOMIC_OP_RETURN(op, asm_op, c_op, I)
> +
> +ATOMIC_OPS(add, add, +,  i)
> +ATOMIC_OPS(sub, add, +, -i)
> +
> +#define arch_atomic_fetch_add_relaxed  arch_atomic_fetch_add_relaxed
> +#define arch_atomic_fetch_sub_relaxed  arch_atomic_fetch_sub_relaxed
> +
> +#define arch_atomic_add_return_relaxed arch_atomic_add_return_relaxed
> +#define arch_atomic_sub_return_relaxed arch_atomic_sub_return_relaxed
> +
> +#undef ATOMIC_OPS
> +#undef ATOMIC_OP_RETURN
> +
> +#define ATOMIC_OPS(op, asm_op, I)                                      \
> +        ATOMIC_FETCH_OP(op, asm_op, I)
> +
> +ATOMIC_OPS(and, and, i)
> +ATOMIC_OPS( or,  or, i)
> +ATOMIC_OPS(xor, xor, i)
> +
> +#define arch_atomic_fetch_and_relaxed  arch_atomic_fetch_and_relaxed
> +#define arch_atomic_fetch_or_relaxed   arch_atomic_fetch_or_relaxed
> +#define arch_atomic_fetch_xor_relaxed  arch_atomic_fetch_xor_relaxed
> +
> +#undef ATOMIC_OPS
> +
> +#undef ATOMIC_FETCH_OP
> +
> +#define ATOMIC_OP()                                                    \
> +static __always_inline                                                 \
> +int arch_atomic_xchg_relaxed(atomic_t *v, int n)                       \
> +{                                                                      \
> +       return __xchg_relaxed(n, &(v->counter), 4);                     \
> +}                                                                      \
> +static __always_inline                                                 \
> +int arch_atomic_cmpxchg_relaxed(atomic_t *v, int o, int n)             \
> +{                                                                      \
> +       return __cmpxchg_relaxed(&(v->counter), o, n, 4);               \
> +}
> +
> +#define ATOMIC_OPS()                                                   \
> +       ATOMIC_OP()
> +
> +ATOMIC_OPS()
> +
> +#define arch_atomic_xchg_relaxed       arch_atomic_xchg_relaxed
> +#define arch_atomic_cmpxchg_relaxed    arch_atomic_cmpxchg_relaxed
> +
> +#undef ATOMIC_OPS
> +#undef ATOMIC_OP
> +
> +#else
> +# include <asm-generic/atomic.h>
> +#endif
> +
> +#endif /* __ASM_CSKY_ATOMIC_H */
> --
> 2.25.1
>


-- 
Best Regards
 Guo Ren

ML: https://lore.kernel.org/linux-csky/