[PATCH 02/12] ARM: io: read 32 bits at once for aligned I/O memcpy/memset

Ahmad Fatoum <a.fatoum@xxxxxxxxxxxxxx> · Wed, 13 Mar 2024 11:56:21 +0100

The barebox arm32 implementation for I/O memcpy/memset uses single
byte accesses exclusively. This is different from the barebox arm64
implementation, which accesses 64 bits at once if the buffer is aligned
and the Linux arm32 implementation, which is the optimized assembly
version that doesn't use single byte accesses for aligned buffers
either.

The current implementation is slower than need be and breaks code ported
from Linux. e.g. the OMAP RNG driver uses memcpy_fromio and expects it
to perform 32-bit accesses as any smaller access leads to a data abort
on the hardware. In Linux this works, but in barebox it crashes.

Avoid these issues by using 32-bit accesses if possible.

Signed-off-by: Ahmad Fatoum <a.fatoum@xxxxxxxxxxxxxx>
---
 arch/arm/lib32/io.c | 73 +++++++++++++++++++++++++++++++++++----------
 1 file changed, 57 insertions(+), 16 deletions(-)

diff --git a/arch/arm/lib32/io.c b/arch/arm/lib32/io.c
index a12da49c0ab2..780b1083a641 100644
--- a/arch/arm/lib32/io.c
+++ b/arch/arm/lib32/io.c
@@ -2,48 +2,89 @@
 
 #include <module.h>
 #include <linux/types.h>
+#include <asm/unaligned.h>
 #include <io.h>
 
 /*
  * Copy data from IO memory space to "real" memory space.
- * This needs to be optimized.
  */
 void memcpy_fromio(void *to, const volatile void __iomem *from, size_t count)
 {
-	unsigned char *t = to;
-	while (count) {
-		count--;
-		*t = readb(from);
-		t++;
+	while (count && !PTR_IS_ALIGNED(from, 4)) {
+		*(u8 *)to = __raw_readb(from);
 		from++;
+		to++;
+		count--;
+	}
+
+	while (count >= 4) {
+		put_unaligned(__raw_readl(from), (u32 *)to);
+		from += 4;
+		to += 4;
+		count -= 4;
+	}
+
+	while (count) {
+		*(u8 *)to = __raw_readb(from);
+		from++;
+		to++;
+		count--;
 	}
 }
 
 /*
  * Copy data from "real" memory space to IO memory space.
- * This needs to be optimized.
  */
 void memcpy_toio(volatile void __iomem *to, const void *from, size_t count)
 {
-	const unsigned char *f = from;
-	while (count) {
-		count--;
-		writeb(*f, to);
-		f++;
+	while (count && !IS_ALIGNED((unsigned long)to, 4)) {
+		__raw_writeb(*(u8 *)from, to);
+		from++;
 		to++;
+		count--;
+	}
+
+	while (count >= 4) {
+		__raw_writel(get_unaligned((u32 *)from), to);
+		from += 4;
+		to += 4;
+		count -= 4;
+	}
+
+	while (count) {
+		__raw_writeb(*(u8 *)from, to);
+		from++;
+		to++;
+		count--;
 	}
 }
 
 /*
  * "memset" on IO memory space.
- * This needs to be optimized.
  */
 void memset_io(volatile void __iomem *dst, int c, size_t count)
 {
-	while (count) {
-		count--;
-		writeb(c, dst);
+	u32 qc = (u8)c;
+
+	qc |= qc << 8;
+	qc |= qc << 16;
+
+	while (count && !PTR_IS_ALIGNED(dst, 4)) {
+		__raw_writeb(c, dst);
 		dst++;
+		count--;
+	}
+
+	while (count >= 4) {
+		__raw_writel(qc, dst);
+		dst += 4;
+		count -= 4;
+	}
+
+	while (count) {
+		__raw_writeb(c, dst);
+		dst++;
+		count--;
 	}
 }
 
-- 
2.39.2