Nicolas Pitre <nicolas.pitre@xxxxxxxxxx> writes: > +static inline uint64_t __arch_xprod_64(uint64_t m, uint64_t n, bool bias) > +{ > + unsigned long long res; > + unsigned int tmp = 0; > + > + if (!bias) { > + asm ( "umull %Q0, %R0, %Q1, %Q2\n\t" > + "mov %Q0, #0" > + : "=&r" (res) > + : "r" (m), "r" (n) > + : "cc"); > + } else if (!(m & ((1ULL << 63) | (1ULL << 31)))) { > + res = m; > + asm ( "umlal %Q0, %R0, %Q1, %Q2\n\t" > + "mov %Q0, #0" > + : "+&r" (res) > + : "r" (m), "r" (n) > + : "cc"); > + } else { > + asm ( "umull %Q0, %R0, %Q2, %Q3\n\t" > + "cmn %Q0, %Q2\n\t" > + "adcs %R0, %R0, %R2\n\t" > + "adc %Q0, %1, #0" > + : "=&r" (res), "+&r" (tmp) > + : "r" (m), "r" (n) Why is tmp using a +r constraint here? The register is not written, so using an input-only operand could/should result in better code. That is also what the old code did. > + : "cc"); > + } > + > + if (!(m & ((1ULL << 63) | (1ULL << 31)))) { > + asm ( "umlal %R0, %Q0, %R1, %Q2\n\t" > + "umlal %R0, %Q0, %Q1, %R2\n\t" > + "mov %R0, #0\n\t" > + "umlal %Q0, %R0, %R1, %R2" > + : "+&r" (res) > + : "r" (m), "r" (n) > + : "cc"); > + } else { > + asm ( "umlal %R0, %Q0, %R2, %Q3\n\t" > + "umlal %R0, %1, %Q2, %R3\n\t" > + "mov %R0, #0\n\t" > + "adds %Q0, %1, %Q0\n\t" > + "adc %R0, %R0, #0\n\t" > + "umlal %Q0, %R0, %R2, %R3" > + : "+&r" (res), "+&r" (tmp) > + : "r" (m), "r" (n) > + : "cc"); > + } > + > + return res; > +} -- Måns Rullgård mans@xxxxxxxxx -- To unsubscribe from this list: send the line "unsubscribe linux-arch" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html