From: Julian Vetter > Sent: 25 September 2024 14:24 > > Various architectures have almost the same implementations for > __memcpy_{to,from}io and __memset_io functions. So, consolidate them > into the existing lib/iomap_copy.c. > > Reviewed-by: Yann Sionneau <ysionneau@xxxxxxxxxxxxx> > Signed-off-by: Julian Vetter <jvetter@xxxxxxxxxxxxx> > --- > Signed-off-by: Julian Vetter <jvetter@xxxxxxxxxxxxx> > --- > Changes for v6: > - Included linux/aslign.h > - Replaced compile time check by ifdef to remove compiler warning > --- > include/asm-generic/io.h | 12 +++++ > lib/iomap_copy.c | 109 +++++++++++++++++++++++++++++++++++++++ > 2 files changed, 121 insertions(+) > > diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h > index 80de699bf6af..9b8e0449da28 100644 > --- a/include/asm-generic/io.h > +++ b/include/asm-generic/io.h > @@ -102,6 +102,18 @@ static inline void log_post_read_mmio(u64 val, u8 width, const volatile void __i > > #endif /* CONFIG_TRACE_MMIO_ACCESS */ > > +#ifndef __memcpy_fromio > +void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t count); > +#endif > + > +#ifndef __memcpy_toio > +void __memcpy_toio(volatile void __iomem *to, const void *from, size_t count); > +#endif > + > +#ifndef __memset_io > +void __memset_io(volatile void __iomem *dst, int c, size_t count); > +#endif > + > /* > * __raw_{read,write}{b,w,l,q}() access memory in native endianness. > * > diff --git a/lib/iomap_copy.c b/lib/iomap_copy.c > index 2fd5712fb7c0..c2cee6410151 100644 > --- a/lib/iomap_copy.c > +++ b/lib/iomap_copy.c > @@ -3,9 +3,15 @@ > * Copyright 2006 PathScale, Inc. All Rights Reserved. > */ > > +#include <asm/unaligned.h> > + > +#include <linux/align.h> > #include <linux/export.h> > +#include <linux/types.h> > #include <linux/io.h> > > +#define NATIVE_STORE_SIZE (BITS_PER_LONG/8) (sizeof (long)) > + > /** > * __iowrite32_copy - copy data to MMIO space, in 32-bit units > * @to: destination, in MMIO space (must be 32-bit aligned) > @@ -76,3 +82,106 @@ void __iowrite64_copy(void __iomem *to, const void *from, size_t count) > } > EXPORT_SYMBOL_GPL(__iowrite64_copy); > #endif > + > + > +#ifndef __memcpy_fromio > +void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t count) > +{ > + while (count && !IS_ALIGNED((unsigned long)from, NATIVE_STORE_SIZE)) { > + *(u8 *)to = __raw_readb(from); > + from++; > + to++; > + count--; > + } > + > + while (count >= NATIVE_STORE_SIZE) { > +#ifdef CONFIG_64BIT > + put_unaligned(__raw_readq(from), (uintptr_t *)to); > +#else > + put_unaligned(__raw_readl(from), (uintptr_t *)to); > +#endif That looks horrid to me. You seem to be mixing several different types and tests. NATIVE_STORE_SIZE is based on the 'long ' type (indirectly and by assumption). CONFIG_64BiIT (probably) implies LP64. readl() reads 4 bytes and readq() 8 (for both 32bit and 64bit kernels) uintptr is an unsigned integer large enough to hold a pointer. The sizes might all happen to match, but there is no need to rely on all of them. I might be best to just use 'sizeof (long)' except that you might get a compile error on some 32bit archs for the: long val = sizeof (val) == 8) ? readq(from) : readl(from); put_unaligned(val, (long *)to); (due to there being no declaration readq()) so it might need a #if somewhere. OTOH there might always be an 'extern' for readq(). If you are using the __raw_readx() functions don't you need the synchronisation barriers top and bottom? Also if put_unaligned() is non-trivial the code will be horrid. An initial test for ((to | from) & (sizeof (long) - 1) == 0) for an aligned copy may be worthwhile. There is the question of whether the code is allowed to do full word reads - valid if the io area behaves like memory. In which case you don't want to do byte transfers for alignment and tail transfers - just read the full word that contains the data. PCIe reads can be horribly slow (writes are 'posted' so much better). I'm not sure how long they take into a 'normal' target, but back to back reads into our fpga are about 128 clocks apart on its internal 125Mhz clock - the host cpu will stall for the entire period. So you definitely want to use the largest register possible. (Or try very hard to never do non-dma reads in either direction.) David > + > + from += NATIVE_STORE_SIZE; > + to += NATIVE_STORE_SIZE; > + count -= NATIVE_STORE_SIZE; > + } > + > + while (count) { > + *(u8 *)to = __raw_readb(from); > + from++; > + to++; > + count--; > + } > +} > +EXPORT_SYMBOL(__memcpy_fromio); > +#endif > + > +#ifndef __memcpy_toio > +void __memcpy_toio(volatile void __iomem *to, const void *from, size_t count) > +{ > + while (count && !IS_ALIGNED((unsigned long)to, NATIVE_STORE_SIZE)) { > + __raw_writeb(*(u8 *)from, to); > + from++; > + to++; > + count--; > + } > + > + while (count >= NATIVE_STORE_SIZE) { > +#ifdef CONFIG_64BIT > + __raw_writeq(get_unaligned((uintptr_t *)from), to); > +#else > + __raw_writel(get_unaligned((uintptr_t *)from), to); > +#endif > + > + from += NATIVE_STORE_SIZE; > + to += NATIVE_STORE_SIZE; > + count -= NATIVE_STORE_SIZE; > + } > + > + while (count) { > + __raw_writeb(*(u8 *)from, to); > + from++; > + to++; > + count--; > + } > +} > +EXPORT_SYMBOL(__memcpy_toio); > +#endif > + > +#ifndef __memset_io > +void __memset_io(volatile void __iomem *dst, int c, size_t count) > +{ > + uintptr_t qc = (u8)c; > + > + qc |= qc << 8; > + qc |= qc << 16; > + > +#ifdef CONFIG_64BIT > + qc |= qc << 32; > +#endif > + > + while (count && !IS_ALIGNED((unsigned long)dst, NATIVE_STORE_SIZE)) { > + __raw_writeb(c, dst); > + dst++; > + count--; > + } > + > + while (count >= NATIVE_STORE_SIZE) { > +#ifdef CONFIG_64BIT > + __raw_writeq(qc, dst); > +#else > + __raw_writel(qc, dst); > +#endif > + > + dst += NATIVE_STORE_SIZE; > + count -= NATIVE_STORE_SIZE; > + } > + > + while (count) { > + __raw_writeb(c, dst); > + dst++; > + count--; > + } > +} > +EXPORT_SYMBOL(__memset_io); > +#endif > -- > 2.34.1 > > > > > - Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK Registration No: 1397386 (Wales)