On Fri, Sep 17, 2021 at 11:03:20AM -0700, Peter Oskolkov wrote: > Add helper functions to work atomically with userspace 32/64 bit values - > there are some .*futex.* named helpers, but they are not exactly > what is needed for UMCG; I haven't found what else I could use, so I > rolled these. > > At the moment only X86_64 is supported. > > Note: the helpers should probably go into arch/ somewhere; I have > them in kernel/sched/umcg_uaccess.h temporarily for convenience. Please > let me know where I should put them. > > Changelog: > v0.5->v0.6: > - replaced mmap_read_lock with mmap_read_lock_killable in fix_pagefault(); > - fix_pagefault now validates proper uaddr alignment; > - renamed umcg.h to umcg_uaccess.h; > v0.4->v0.5: > - added xchg_user_** helpers; > v0.3->v0.4: > - added put_user_nosleep; > - removed linked list/stack operations patch; > v0.2->v0.3: > - renamed and refactored the helpers a bit, as described above; > - moved linked list/stack operations into a separate patch. > > Signed-off-by: Peter Oskolkov <posk@xxxxxxxxxx> > --- > kernel/sched/umcg_uaccess.h | 344 ++++++++++++++++++++++++++++++++++++ > 1 file changed, 344 insertions(+) > create mode 100644 kernel/sched/umcg_uaccess.h > > diff --git a/kernel/sched/umcg_uaccess.h b/kernel/sched/umcg_uaccess.h > new file mode 100644 > index 000000000000..e4ead8d2fd62 > --- /dev/null > +++ b/kernel/sched/umcg_uaccess.h > @@ -0,0 +1,344 @@ > +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ > +#ifndef _KERNEL_SCHED_UMCG_UACCESS_H > +#define _KERNEL_SCHED_UMCG_UACCESS_H > + > +#ifdef CONFIG_X86_64 > + > +#include <linux/uaccess.h> > + > +#include <asm/asm.h> > +#include <linux/atomic.h> > +#include <asm/uaccess.h> > + > +/* TODO: move atomic operations below into arch/ headers */ > +static inline int __try_cmpxchg_user_32(u32 *uval, u32 __user *uaddr, > + u32 oldval, u32 newval) > +{ > + int ret = 0; > + > + asm volatile("\n" > + "1:\t" LOCK_PREFIX "cmpxchgl %4, %2\n" > + "2:\n" > + "\t.section .fixup, \"ax\"\n" > + "3:\tmov %3, %0\n" > + "\tjmp 2b\n" > + "\t.previous\n" > + _ASM_EXTABLE_UA(1b, 3b) > + : "+r" (ret), "=a" (oldval), "+m" (*uaddr) > + : "i" (-EFAULT), "r" (newval), "1" (oldval) > + : "memory" > + ); > + *uval = oldval; > + return ret; > +} > + > +static inline int __try_cmpxchg_user_64(u64 *uval, u64 __user *uaddr, > + u64 oldval, u64 newval) > +{ > + int ret = 0; > + > + asm volatile("\n" > + "1:\t" LOCK_PREFIX "cmpxchgq %4, %2\n" > + "2:\n" > + "\t.section .fixup, \"ax\"\n" > + "3:\tmov %3, %0\n" > + "\tjmp 2b\n" > + "\t.previous\n" > + _ASM_EXTABLE_UA(1b, 3b) > + : "+r" (ret), "=a" (oldval), "+m" (*uaddr) > + : "i" (-EFAULT), "r" (newval), "1" (oldval) > + : "memory" > + ); > + *uval = oldval; > + return ret; > +} > + > +static inline int fix_pagefault(unsigned long uaddr, bool write_fault, int bytes) > +{ > + struct mm_struct *mm = current->mm; > + int ret; > + > + /* Validate proper alignment. */ > + if (uaddr % bytes) > + return -EINVAL; > + > + if (mmap_read_lock_killable(mm)) > + return -EINTR; > + ret = fixup_user_fault(mm, uaddr, write_fault ? FAULT_FLAG_WRITE : 0, > + NULL); > + mmap_read_unlock(mm); > + > + return ret < 0 ? ret : 0; > +} > + > +/** > + * cmpxchg_32_user_nosleep - compare_exchange 32-bit values ^^^^^^^^^^^^^^^^^^^^^^^ Need to be consistent with the function name below. > + * Return: > + * 0 - OK > + * -EFAULT: memory access error > + * -EAGAIN: @expected did not match; consult @prev > + */ > +static inline int cmpxchg_user_32_nosleep(u32 __user *uaddr, u32 *old, u32 new) > +{ > + int ret = -EFAULT; > + u32 __old = *old; > + > + if (unlikely(!access_ok(uaddr, sizeof(*uaddr)))) > + return -EFAULT; > + > + pagefault_disable(); > + > + __uaccess_begin_nospec(); > + ret = __try_cmpxchg_user_32(old, uaddr, __old, new); > + user_access_end(); > + > + if (!ret) > + ret = *old == __old ? 0 : -EAGAIN; > + > + pagefault_enable(); > + return ret; > +} > + > +/** > + * cmpxchg_64_user_nosleep - compare_exchange 64-bit values ^^^^^^^^^^^^^^^^^^^^^^^ Ditto. > + * Return: > + * 0 - OK > + * -EFAULT: memory access error > + * -EAGAIN: @expected did not match; consult @prev > + */ > +static inline int cmpxchg_user_64_nosleep(u64 __user *uaddr, u64 *old, u64 new) > +{ > + int ret = -EFAULT; > + u64 __old = *old; > + > + if (unlikely(!access_ok(uaddr, sizeof(*uaddr)))) > + return -EFAULT; > + > + pagefault_disable(); > + > + __uaccess_begin_nospec(); > + ret = __try_cmpxchg_user_64(old, uaddr, __old, new); > + user_access_end(); > + > + if (!ret) > + ret = *old == __old ? 0 : -EAGAIN; > + > + pagefault_enable(); > + > + return ret; > +} > + > +/** > + * cmpxchg_32_user - compare_exchange 32-bit values ^^^^^^^^^^^^^^^ Ditto. > + * Return: > + * 0 - OK > + * -EFAULT: memory access error > + * -EAGAIN: @expected did not match; consult @prev > + */ > +static inline int cmpxchg_user_32(u32 __user *uaddr, u32 *old, u32 new) > +{ > + int ret = -EFAULT; > + u32 __old = *old; > + > + if (unlikely(!access_ok(uaddr, sizeof(*uaddr)))) > + return -EFAULT; > + > + pagefault_disable(); > + > + while (true) { > + __uaccess_begin_nospec(); > + ret = __try_cmpxchg_user_32(old, uaddr, __old, new); > + user_access_end(); > + > + if (!ret) { > + ret = *old == __old ? 0 : -EAGAIN; > + break; > + } > + > + if (fix_pagefault((unsigned long)uaddr, true, sizeof(*uaddr)) < 0) > + break; > + } > + > + pagefault_enable(); > + return ret; > +} > + > +/** > + * cmpxchg_64_user - compare_exchange 64-bit values ^^^^^^^^^^^^^^^ Ditto. > + * Return: > + * 0 - OK > + * -EFAULT: memory access error > + * -EAGAIN: @expected did not match; consult @prev > + */ > +static inline int cmpxchg_user_64(u64 __user *uaddr, u64 *old, u64 new) > +{ > + int ret = -EFAULT; > + u64 __old = *old; > + > + if (unlikely(!access_ok(uaddr, sizeof(*uaddr)))) > + return -EFAULT; > + > + pagefault_disable(); > + > + while (true) { > + __uaccess_begin_nospec(); > + ret = __try_cmpxchg_user_64(old, uaddr, __old, new); > + user_access_end(); > + > + if (!ret) { > + ret = *old == __old ? 0 : -EAGAIN; > + break; > + } > + > + if (fix_pagefault((unsigned long)uaddr, true, sizeof(*uaddr)) < 0) > + break; > + } > + > + pagefault_enable(); > + > + return ret; > +} > + > +static inline int __try_xchg_user_32(u32 *oval, u32 __user *uaddr, u32 newval) > +{ > + u32 oldval = 0; > + int ret = 0; > + > + asm volatile("\n" > + "1:\txchgl %0, %2\n" > + "2:\n" > + "\t.section .fixup, \"ax\"\n" > + "3:\tmov %3, %0\n" > + "\tjmp 2b\n" > + "\t.previous\n" > + _ASM_EXTABLE_UA(1b, 3b) > + : "=r" (oldval), "=r" (ret), "+m" (*uaddr) > + : "i" (-EFAULT), "0" (newval), "1" (0) > + ); > + > + if (ret) > + return ret; > + > + *oval = oldval; > + return 0; > +} > + > +static inline int __try_xchg_user_64(u64 *oval, u64 __user *uaddr, u64 newval) > +{ > + u64 oldval = 0; > + int ret = 0; > + > + asm volatile("\n" > + "1:\txchgq %0, %2\n" > + "2:\n" > + "\t.section .fixup, \"ax\"\n" > + "3:\tmov %3, %0\n" > + "\tjmp 2b\n" > + "\t.previous\n" > + _ASM_EXTABLE_UA(1b, 3b) > + : "=r" (oldval), "=r" (ret), "+m" (*uaddr) > + : "i" (-EFAULT), "0" (newval), "1" (0) > + ); > + > + if (ret) > + return ret; > + > + *oval = oldval; > + return 0; > +} > + > +/** > + * xchg_32_user - atomically exchange 64-bit values ^^^^^^^^^^^^ Ditto. > + * Return: > + * 0 - OK > + * -EFAULT: memory access error > + */ > +static inline int xchg_user_32(u32 __user *uaddr, u32 *val) > +{ > + int ret = -EFAULT; > + > + if (unlikely(!access_ok(uaddr, sizeof(*uaddr)))) > + return -EFAULT; > + > + pagefault_disable(); > + > + while (true) { > + > + __uaccess_begin_nospec(); > + ret = __try_xchg_user_32(val, uaddr, *val); > + user_access_end(); > + > + if (!ret) > + break; > + > + if (fix_pagefault((unsigned long)uaddr, true, sizeof(*uaddr)) < 0) > + break; > + } > + > + pagefault_enable(); > + > + return ret; > +} > + > +/** > + * xchg_64_user - atomically exchange 64-bit values ^^^^^^^^^^^^ Ditto. > + * Return: > + * 0 - OK > + * -EFAULT: memory access error > + */ > +static inline int xchg_user_64(u64 __user *uaddr, u64 *val) > +{ > + int ret = -EFAULT; > + > + if (unlikely(!access_ok(uaddr, sizeof(*uaddr)))) > + return -EFAULT; > + > + pagefault_disable(); > + > + while (true) { > + > + __uaccess_begin_nospec(); > + ret = __try_xchg_user_64(val, uaddr, *val); > + user_access_end(); > + > + if (!ret) > + break; > + > + if (fix_pagefault((unsigned long)uaddr, true, sizeof(*uaddr)) < 0) > + break; > + } > + > + pagefault_enable(); > + > + return ret; > +} > + > +/** > + * get_user_nosleep - get user value without sleeping. > + * > + * get_user() might sleep and therefore cannot be used in preempt-disabled > + * regions. > + */ > +#define get_user_nosleep(out, uaddr) \ > +({ \ > + int ret = -EFAULT; \ > + \ > + if (access_ok((uaddr), sizeof(*(uaddr)))) { \ > + pagefault_disable(); \ > + \ > + if (!__get_user((out), (uaddr))) \ > + ret = 0; \ > + \ > + pagefault_enable(); \ > + } \ > + ret; \ > +}) > + > +#endif /* CONFIG_X86_64 */ > +#endif /* _KERNEL_SCHED_UMCG_UACCESS_H */ > -- > 2.25.1 > Thanks, Tao