For memory accesses with write-combining attributes (e.g. those returned by ioremap_wc()), the CPU may wait for prior accesses to be merged with subsequent ones. But in some situation, such wait is bad for the performance. We introduce io_stop_wc() to prevent the merging of write-combining memory accesses before this macro with those after it. We add implementation for ARM64 using DGH instruction and provide NOP implementation for other architectures. Signed-off-by: Xiongfeng Wang <wangxiongfeng2@xxxxxxxxxx> Suggested-by: Will Deacon <will@xxxxxxxxxx> Suggested-by: Catalin Marinas <catalin.marinas@xxxxxxx> --- v1->v2: change 'Normal-Non Cacheable' to 'write-combining' --- Documentation/memory-barriers.txt | 8 ++++++++ arch/arm64/include/asm/barrier.h | 9 +++++++++ include/asm-generic/barrier.h | 11 +++++++++++ 3 files changed, 28 insertions(+) diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index 7367ada13208..b12df9137e1c 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt @@ -1950,6 +1950,14 @@ There are some more advanced barrier functions: For load from persistent memory, existing read memory barriers are sufficient to ensure read ordering. + (*) io_stop_wc(); + + For memory accesses with write-combining attributes (e.g. those returned + by ioremap_wc(), the CPU may wait for prior accesses to be merged with + subsequent ones. io_stop_wc() can be used to prevent the merging of + write-combining memory accesses before this macro with those after it when + such wait has performance implications. + =============================== IMPLICIT KERNEL MEMORY BARRIERS =============================== diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index 1c5a00598458..62217be36217 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -26,6 +26,14 @@ #define __tsb_csync() asm volatile("hint #18" : : : "memory") #define csdb() asm volatile("hint #20" : : : "memory") +/* + * Data Gathering Hint: + * This instruction prevents merging memory accesses with Normal-NC or + * Device-GRE attributes before the hint instruction with any memory accesses + * appearing after the hint instruction. + */ +#define dgh() asm volatile("hint #6" : : : "memory") + #ifdef CONFIG_ARM64_PSEUDO_NMI #define pmr_sync() \ do { \ @@ -46,6 +54,7 @@ #define dma_rmb() dmb(oshld) #define dma_wmb() dmb(oshst) +#define io_stop_wc() dgh() #define tsb_csync() \ do { \ diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h index 640f09479bdf..4c2c1b830344 100644 --- a/include/asm-generic/barrier.h +++ b/include/asm-generic/barrier.h @@ -251,5 +251,16 @@ do { \ #define pmem_wmb() wmb() #endif +/* + * ioremap_wc() maps I/O memory as memory with write-combining attributes. For + * this kind of memory accesses, the CPU may wait for prior accesses to be + * merged with subsequent ones. In some situation, such wait is bad for the + * performance. io_stop_wc() can be used to prevent the merging of + * write-combining memory accesses before this macro with those after it. + */ +#ifndef io_stop_wc +#define io_stop_wc do { } while (0) +#endif + #endif /* !__ASSEMBLY__ */ #endif /* __ASM_GENERIC_BARRIER_H */ -- 2.20.1