On Tue, Jan 17, 2023 at 04:02:06PM +0100, Peter Zijlstra wrote: > On Tue, Jan 17, 2023 at 04:57:03PM +0300, Kirill A. Shutemov wrote: > > On Tue, Jan 17, 2023 at 02:05:22PM +0100, Peter Zijlstra wrote: > > > On Wed, Jan 11, 2023 at 03:37:27PM +0300, Kirill A. Shutemov wrote: > > > > > > > #define __untagged_addr(untag_mask, addr) > > > > u64 __addr = (__force u64)(addr); \ > > > > - s64 sign = (s64)__addr >> 63; \ > > > > - __addr &= untag_mask | sign; \ > > > > + if (static_branch_likely(&tagged_addr_key)) { \ > > > > + s64 sign = (s64)__addr >> 63; \ > > > > + __addr &= untag_mask | sign; \ > > > > + } \ > > > > (__force __typeof__(addr))__addr; \ > > > > }) > > > > > > > > #define untagged_addr(addr) __untagged_addr(current_untag_mask(), addr) > > > > > > Is the compiler clever enough to put the memop inside the branch? > > > > Hm. You mean current_untag_mask() inside static_branch_likely()? > > > > But it is preprocessor who does this, not compiler. So, yes, the memop is > > inside the branch. > > > > Or I didn't understand your question. > > Nah, call it a pre-lunch dip, I overlooked the whole CPP angle -- d'0h. > > That said, I did just put it through a compiler to see wth it did and it > is pretty gross: I tried to replace static branch with alternative. It kinda works, but required few hack. Thanks to Andrew Cooper for helping to untangle them. I am not sure if it worth the effort. I don't have any evidence that it helps. untagged_addr() overhead is rather small and hides in noise of syscall cost. I only made alternative for untagged_addr(), but not for untagged_addr_remote(). _remote() case has very few users. BTW, it would be nice to be able to apply alternative later, delaying it until the first user of LAM, like I did with static_branch. We don't have a way to do this right? Any opinions? I am okay dropping the patch altogether. diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index c44b56f7ffba..3f0c31044f02 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -75,6 +75,12 @@ # define DISABLE_CALL_DEPTH_TRACKING (1 << (X86_FEATURE_CALL_DEPTH & 31)) #endif +#ifdef CONFIG_ADDRESS_MASKING +# define DISABLE_LAM 0 +#else +# define DISABLE_LAM (1 << (X86_FEATURE_LAM & 31)) +#endif + #ifdef CONFIG_INTEL_IOMMU_SVM # define DISABLE_ENQCMD 0 #else @@ -115,7 +121,7 @@ #define DISABLED_MASK10 0 #define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET| \ DISABLE_CALL_DEPTH_TRACKING) -#define DISABLED_MASK12 0 +#define DISABLED_MASK12 (DISABLE_LAM) #define DISABLED_MASK13 0 #define DISABLED_MASK14 0 #define DISABLED_MASK15 0 diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index f9f85d596581..57ccb91fcccf 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -9,6 +9,7 @@ #include <linux/kasan-checks.h> #include <linux/mm_types.h> #include <linux/string.h> +#include <linux/mmap_lock.h> #include <asm/asm.h> #include <asm/page.h> #include <asm/smap.h> @@ -24,28 +25,48 @@ static inline bool pagefault_disabled(void); #endif #ifdef CONFIG_ADDRESS_MASKING -DECLARE_STATIC_KEY_FALSE(tagged_addr_key); +static inline unsigned long __untagged_addr(unsigned long addr) +{ + /* + * Magic with the 'sign' allows to untag userspace pointer without + * any branches while leaving kernel addresses intact. + */ + long sign; + + /* + * Refer tlbstate_untag_mask directly to avoid RIP-relative relocation + * in alternative instructions. The relocation gets wrong when gets + * copied to the target place. + */ + asm (ALTERNATIVE("", + "sar $63, %[sign]\n\t" /* user_ptr ? 0 : -1UL */ + "or %%gs:tlbstate_untag_mask, %[sign]\n\t" + "and %[sign], %[addr]\n\t", X86_FEATURE_LAM) + : [addr] "+r" (addr), [sign] "=r" (sign) + : "m" (tlbstate_untag_mask), "[sign]" (addr)); + + return addr; +} -/* - * Mask out tag bits from the address. - * - * Magic with the 'sign' allows to untag userspace pointer without any branches - * while leaving kernel addresses intact. - */ -#define __untagged_addr(untag_mask, addr) ({ \ - u64 __addr = (__force u64)(addr); \ - if (static_branch_likely(&tagged_addr_key)) { \ - s64 sign = (s64)__addr >> 63; \ - __addr &= untag_mask | sign; \ - } \ - (__force __typeof__(addr))__addr; \ +#define untagged_addr(addr) ({ \ + unsigned long __addr = (__force unsigned long)(addr); \ + (__force __typeof__(addr))__untagged_addr(__addr); \ }) -#define untagged_addr(addr) __untagged_addr(current_untag_mask(), addr) +static inline unsigned long __untagged_addr_remote(struct mm_struct *mm, + unsigned long addr) +{ + long sign = addr >> 63; + + mmap_assert_locked(mm); + addr &= (mm)->context.untag_mask | sign; + + return addr; +} #define untagged_addr_remote(mm, addr) ({ \ - mmap_assert_locked(mm); \ - __untagged_addr((mm)->context.untag_mask, addr); \ + unsigned long __addr = (__force unsigned long)(addr); \ + (__force __typeof__(addr))__untagged_addr_remote(mm, __addr); \ }) #else diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 0831d2be190f..e006725afdf1 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -745,9 +745,6 @@ static long prctl_map_vdso(const struct vdso_image *image, unsigned long addr) #ifdef CONFIG_ADDRESS_MASKING -DEFINE_STATIC_KEY_FALSE(tagged_addr_key); -EXPORT_SYMBOL_GPL(tagged_addr_key); - #define LAM_U57_BITS 6 static int prctl_enable_tagged_addr(struct mm_struct *mm, unsigned long nr_bits) @@ -787,8 +784,6 @@ static int prctl_enable_tagged_addr(struct mm_struct *mm, unsigned long nr_bits) set_bit(MM_CONTEXT_LOCK_LAM, &mm->context.flags); mmap_write_unlock(mm); - - static_branch_enable(&tagged_addr_key); return 0; } #endif -- Kiryl Shutsemau / Kirill A. Shutemov