On 9/3/21 2:41 PM, John David Anglin wrote:
Hi Helge,
I don't believe the compiler can implement __arch_swab64() as efficiently as the inline assembly
code. The compiler doesn't know about the permh or hsh instructions.
Yes, it does.
u64 test(u64 x)
{
return swab64(x);
}
generates:
0000000000000070 <test>:
70: 08 03 02 41 copy r3,r1
74: 08 1e 02 43 copy sp,r3
78: 73 c1 00 88 std,ma r1,40(sp)
7c: fb 5a 69 1f permh,3210 r26,r31
80: f8 1f 8a 1c hshl r31,8,ret0
84: fb e0 ca 1f hshr,u r31,8,r31
88: 0b fc 02 5c or ret0,r31,ret0
8c: 34 7e 00 80 ldo 40(r3),sp
90: e8 40 d0 00 bve (rp)
94: 53 c3 3f 8d ldd,mb -40(sp),r3
Helge
Dave
On 2021-09-03 4:03 a.m., Helge Deller wrote:
No need to keep those as inline assembly functions, the compiler now
generates the same or even better optimized code.
Signed-off-by: Helge Deller <deller@xxxxxx>
diff --git a/arch/parisc/include/uapi/asm/swab.h b/arch/parisc/include/uapi/asm/swab.h
deleted file mode 100644
index 35fb2d1bfbbd..000000000000
--- a/arch/parisc/include/uapi/asm/swab.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _PARISC_SWAB_H
-#define _PARISC_SWAB_H
-
-#include <asm/bitsperlong.h>
-#include <linux/types.h>
-#include <linux/compiler.h>
-
-#define __SWAB_64_THRU_32__
-
-static inline __attribute_const__ __u16 __arch_swab16(__u16 x)
-{
- __asm__("dep %0, 15, 8, %0\n\t" /* deposit 00ab -> 0bab */
- "shd %%r0, %0, 8, %0" /* shift 000000ab -> 00ba */
- : "=r" (x)
- : "0" (x));
- return x;
-}
-#define __arch_swab16 __arch_swab16
-
-static inline __attribute_const__ __u32 __arch_swab24(__u32 x)
-{
- __asm__("shd %0, %0, 8, %0\n\t" /* shift xabcxabc -> cxab */
- "dep %0, 15, 8, %0\n\t" /* deposit cxab -> cbab */
- "shd %%r0, %0, 8, %0" /* shift 0000cbab -> 0cba */
- : "=r" (x)
- : "0" (x));
- return x;
-}
-
-static inline __attribute_const__ __u32 __arch_swab32(__u32 x)
-{
- unsigned int temp;
- __asm__("shd %0, %0, 16, %1\n\t" /* shift abcdabcd -> cdab */
- "dep %1, 15, 8, %1\n\t" /* deposit cdab -> cbab */
- "shd %0, %1, 8, %0" /* shift abcdcbab -> dcba */
- : "=r" (x), "=&r" (temp)
- : "0" (x));
- return x;
-}
-#define __arch_swab32 __arch_swab32
-
-#if __BITS_PER_LONG > 32
-/*
-** From "PA-RISC 2.0 Architecture", HP Professional Books.
-** See Appendix I page 8 , "Endian Byte Swapping".
-**
-** Pretty cool algorithm: (* == zero'd bits)
-** PERMH 01234567 -> 67452301 into %0
-** HSHL 67452301 -> 7*5*3*1* into %1
-** HSHR 67452301 -> *6*4*2*0 into %0
-** OR %0 | %1 -> 76543210 into %0 (all done!)
-*/
-static inline __attribute_const__ __u64 __arch_swab64(__u64 x)
-{
- __u64 temp;
- __asm__("permh,3210 %0, %0\n\t"
- "hshl %0, 8, %1\n\t"
- "hshr,u %0, 8, %0\n\t"
- "or %1, %0, %0"
- : "=r" (x), "=&r" (temp)
- : "0" (x));
- return x;
-}
-#define __arch_swab64 __arch_swab64
-#endif /* __BITS_PER_LONG > 32 */
-
-#endif /* _PARISC_SWAB_H */