From: Matteo Croce <mcroce@xxxxxxxxxxxxx> Rewrite the generic memcpy() to copy a word at time, without generating unaligned accesses. The procedure is made of three steps: First copy data one byte at time until the destination buffer is aligned to a long boundary. Then copy the data one long at time shifting the current and the next long to compose a long at every cycle. Finally, copy the remainder one byte at time. Signed-off-by: Matteo Croce <mcroce@xxxxxxxxxxxxx> --- lib/string.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 78 insertions(+), 3 deletions(-) diff --git a/lib/string.c b/lib/string.c index 546d59711a12..15e906f97d9e 100644 --- a/lib/string.c +++ b/lib/string.c @@ -33,6 +33,24 @@ #include <asm/word-at-a-time.h> #include <asm/page.h> +#define MIN_THRESHOLD (sizeof(long) * 2) + +/* convenience union to avoid cast between different pointer types */ +union types { + u8 *as_u8; + unsigned long *as_ulong; + uintptr_t as_uptr; +}; + +union const_types { + const u8 *as_u8; + const unsigned long *as_ulong; + uintptr_t as_uptr; +}; + +static const unsigned int bytes_long = sizeof(long); +static const unsigned int word_mask = bytes_long - 1; + #ifndef __HAVE_ARCH_STRNCASECMP /** * strncasecmp - Case insensitive, length-limited string comparison @@ -878,16 +896,73 @@ EXPORT_SYMBOL(memset64); * You should not use this function to access IO space, use memcpy_toio() * or memcpy_fromio() instead. */ + +#ifdef __BIG_ENDIAN +#define MERGE_UL(h, l, d) ((h) << ((d) * 8) | (l) >> ((bytes_long - (d)) * 8)) +#else +#define MERGE_UL(h, l, d) ((h) >> ((d) * 8) | (l) << ((bytes_long - (d)) * 8)) +#endif + void *memcpy(void *dest, const void *src, size_t count) { - char *tmp = dest; - const char *s = src; + union const_types s = { .as_u8 = src }; + union types d = { .as_u8 = dest }; + int distance = 0; + + if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) { + if (count < MIN_THRESHOLD) + goto copy_remainder; + + /* Copy a byte at time until destination is aligned. */ + for (; d.as_uptr & word_mask; count--) + *d.as_u8++ = *s.as_u8++; + + distance = s.as_uptr & word_mask; + } + if (distance) { + unsigned long last, next; + + /* + * s is distance bytes ahead of d, and d just reached + * the alignment boundary. Move s backward to word align it + * and shift data to compensate for distance, in order to do + * word-by-word copy. + */ + s.as_u8 -= distance; + + next = s.as_ulong[0]; + for (; count >= bytes_long + word_mask; count -= bytes_long) { + last = next; + next = s.as_ulong[1]; + + d.as_ulong[0] = MERGE_UL(last, next, distance); + + d.as_ulong++; + s.as_ulong++; + } + + /* Restore s with the original offset. */ + s.as_u8 += distance; + } else { + /* + * If the source and dest lower bits are the same, do a simple + * 32/64 bit wide copy. + */ + for (; count >= bytes_long; count -= bytes_long) + *d.as_ulong++ = *s.as_ulong++; + } + +copy_remainder: while (count--) - *tmp++ = *s++; + *d.as_u8++ = *s.as_u8++; + return dest; } EXPORT_SYMBOL(memcpy); + +#undef MERGE_UL + #endif #ifndef __HAVE_ARCH_MEMMOVE -- 2.31.1