The barebox arm32 implementation for I/O memcpy/memset uses single byte accesses exclusively. This is different from the barebox arm64 implementation, which accesses 64 bits at once if the buffer is aligned and the Linux arm32 implementation, which is the optimized assembly version that doesn't use single byte accesses for aligned buffers either. The current implementation is slower than need be and breaks code ported from Linux. e.g. the OMAP RNG driver uses memcpy_fromio and expects it to perform 32-bit accesses as any smaller access leads to a data abort on the hardware. In Linux this works, but in barebox it crashes. Avoid these issues by using 32-bit accesses if possible. Signed-off-by: Ahmad Fatoum <a.fatoum@xxxxxxxxxxxxxx> --- arch/arm/lib32/io.c | 73 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 57 insertions(+), 16 deletions(-) diff --git a/arch/arm/lib32/io.c b/arch/arm/lib32/io.c index a12da49c0ab2..780b1083a641 100644 --- a/arch/arm/lib32/io.c +++ b/arch/arm/lib32/io.c @@ -2,48 +2,89 @@ #include <module.h> #include <linux/types.h> +#include <asm/unaligned.h> #include <io.h> /* * Copy data from IO memory space to "real" memory space. - * This needs to be optimized. */ void memcpy_fromio(void *to, const volatile void __iomem *from, size_t count) { - unsigned char *t = to; - while (count) { - count--; - *t = readb(from); - t++; + while (count && !PTR_IS_ALIGNED(from, 4)) { + *(u8 *)to = __raw_readb(from); from++; + to++; + count--; + } + + while (count >= 4) { + put_unaligned(__raw_readl(from), (u32 *)to); + from += 4; + to += 4; + count -= 4; + } + + while (count) { + *(u8 *)to = __raw_readb(from); + from++; + to++; + count--; } } /* * Copy data from "real" memory space to IO memory space. - * This needs to be optimized. */ void memcpy_toio(volatile void __iomem *to, const void *from, size_t count) { - const unsigned char *f = from; - while (count) { - count--; - writeb(*f, to); - f++; + while (count && !IS_ALIGNED((unsigned long)to, 4)) { + __raw_writeb(*(u8 *)from, to); + from++; to++; + count--; + } + + while (count >= 4) { + __raw_writel(get_unaligned((u32 *)from), to); + from += 4; + to += 4; + count -= 4; + } + + while (count) { + __raw_writeb(*(u8 *)from, to); + from++; + to++; + count--; } } /* * "memset" on IO memory space. - * This needs to be optimized. */ void memset_io(volatile void __iomem *dst, int c, size_t count) { - while (count) { - count--; - writeb(c, dst); + u32 qc = (u8)c; + + qc |= qc << 8; + qc |= qc << 16; + + while (count && !PTR_IS_ALIGNED(dst, 4)) { + __raw_writeb(c, dst); dst++; + count--; + } + + while (count >= 4) { + __raw_writel(qc, dst); + dst += 4; + count -= 4; + } + + while (count) { + __raw_writeb(c, dst); + dst++; + count--; } } -- 2.39.2