On Wed, May 11, 2022 at 09:21:16AM +0200, Peter Zijlstra wrote: > On Wed, May 11, 2022 at 05:27:46AM +0300, Kirill A. Shutemov wrote: > > +#define __untagged_addr(addr, n) \ > > + ((__force __typeof__(addr))sign_extend64((__force u64)(addr), n)) > > + > > +#define untagged_addr(addr) ({ \ > > + u64 __addr = (__force u64)(addr); \ > > + if (__addr >> 63 == 0) { \ > > + if (current->thread.features & X86_THREAD_LAM_U57) \ > > + __addr &= __untagged_addr(__addr, 56); \ > > + else if (current->thread.features & X86_THREAD_LAM_U48) \ > > + __addr &= __untagged_addr(__addr, 47); \ > > + } \ > > + (__force __typeof__(addr))__addr; \ > > +}) > > Assuming you got your bits in hardware order: > > u64 __addr = addr; > if ((s64)__addr >= 0) { > int lam = (current->thread.features >> X86_THREAD_LAM_U57) & 3; That needs a _BIT suffix or something, same in the previous reply. > if (lam) > __addr &= sign_extend64(__addr, 65 - 9*lam); > } > __addr; > > has less branches on and should definitely result in better code (or I > need more morning juice). I definitely needs more morning juice :-)