From: Guo Ren <guoren@xxxxxxxxxxxxxxxxx> Optimize memcpy_{from,to}io() and memset_io() by transferring in 64 bit as much as possible with minimized barrier usage. This simplest optimization brings faster throughput compare to current byte-by-byte read and write with barrier in the loop. Code's skeleton is taken from the powerpc & arm64. Signed-off-by: Guo Ren <guoren@xxxxxxxxxxxxxxxxx> Signed-off-by: Guo Ren <guoren@xxxxxxxxxx> --- Changes in V2: - Fixup compile error by Makefile missing io.o --- arch/csky/include/asm/io.h | 11 +++++ arch/csky/kernel/Makefile | 2 +- arch/csky/kernel/io.c | 91 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 103 insertions(+), 1 deletion(-) create mode 100644 arch/csky/kernel/io.c diff --git a/arch/csky/include/asm/io.h b/arch/csky/include/asm/io.h index f82654053dc0..adb64e26194f 100644 --- a/arch/csky/include/asm/io.h +++ b/arch/csky/include/asm/io.h @@ -32,6 +32,17 @@ #define writel(v,c) ({ wmb(); writel_relaxed((v),(c)); mb(); }) #endif +/* + * String version of I/O memory access operations. + */ +extern void __memcpy_fromio(void *, const volatile void __iomem *, size_t); +extern void __memcpy_toio(volatile void __iomem *, const void *, size_t); +extern void __memset_io(volatile void __iomem *, int, size_t); + +#define memset_io(c,v,l) __memset_io((c),(v),(l)) +#define memcpy_fromio(a,c,l) __memcpy_fromio((a),(c),(l)) +#define memcpy_toio(c,a,l) __memcpy_toio((c),(a),(l)) + /* * I/O memory mapping functions. */ diff --git a/arch/csky/kernel/Makefile b/arch/csky/kernel/Makefile index 6c0f36010ed0..4eb41421ca5b 100644 --- a/arch/csky/kernel/Makefile +++ b/arch/csky/kernel/Makefile @@ -2,7 +2,7 @@ extra-y := head.o vmlinux.lds obj-y += entry.o atomic.o signal.o traps.o irq.o time.o vdso.o vdso/ -obj-y += power.o syscall.o syscall_table.o setup.o +obj-y += power.o syscall.o syscall_table.o setup.o io.o obj-y += process.o cpu-probe.o ptrace.o stacktrace.o obj-y += probes/ diff --git a/arch/csky/kernel/io.c b/arch/csky/kernel/io.c new file mode 100644 index 000000000000..5883f13fa2b1 --- /dev/null +++ b/arch/csky/kernel/io.c @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/export.h> +#include <linux/types.h> +#include <linux/io.h> + +/* + * Copy data from IO memory space to "real" memory space. + */ +void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t count) +{ + while (count && !IS_ALIGNED((unsigned long)from, 4)) { + *(u8 *)to = __raw_readb(from); + from++; + to++; + count--; + } + + while (count >= 4) { + *(u32 *)to = __raw_readl(from); + from += 4; + to += 4; + count -= 4; + } + + while (count) { + *(u8 *)to = __raw_readb(from); + from++; + to++; + count--; + } +} +EXPORT_SYMBOL(__memcpy_fromio); + +/* + * Copy data from "real" memory space to IO memory space. + */ +void __memcpy_toio(volatile void __iomem *to, const void *from, size_t count) +{ + while (count && !IS_ALIGNED((unsigned long)to, 4)) { + __raw_writeb(*(u8 *)from, to); + from++; + to++; + count--; + } + + while (count >= 4) { + __raw_writel(*(u32 *)from, to); + from += 4; + to += 4; + count -= 4; + } + + while (count) { + __raw_writeb(*(u8 *)from, to); + from++; + to++; + count--; + } +} +EXPORT_SYMBOL(__memcpy_toio); + +/* + * "memset" on IO memory space. + */ +void __memset_io(volatile void __iomem *dst, int c, size_t count) +{ + u32 qc = (u8)c; + + qc |= qc << 8; + qc |= qc << 16; + + while (count && !IS_ALIGNED((unsigned long)dst, 4)) { + __raw_writeb(c, dst); + dst++; + count--; + } + + while (count >= 4) { + __raw_writel(qc, dst); + dst += 4; + count -= 4; + } + + while (count) { + __raw_writeb(c, dst); + dst++; + count--; + } +} +EXPORT_SYMBOL(__memset_io); -- 2.25.1