On Wed, Feb 16, 2022 at 02:13:32PM +0100, Arnd Bergmann wrote: > From: Arnd Bergmann <arnd@xxxxxxxx> > > There are no remaining callers of set_fs(), so CONFIG_SET_FS > can be removed globally, along with the thread_info field and > any references to it. > > This turns access_ok() into a cheaper check against TASK_SIZE_MAX. > > With CONFIG_SET_FS gone, so drop all remaining references to > set_fs()/get_fs(), mm_segment_t and uaccess_kernel(). > > Signed-off-by: Arnd Bergmann <arnd@xxxxxxxx> > --- ... > arch/openrisc/Kconfig | 1 - > arch/openrisc/include/asm/thread_info.h | 7 --- > arch/openrisc/include/asm/uaccess.h | 23 -------- ... > fs/exec.c | 6 -- > include/asm-generic/access_ok.h | 10 +--- > include/asm-generic/uaccess.h | 25 +------- > include/linux/syscalls.h | 4 -- > include/linux/uaccess.h | 33 ----------- > include/rdma/ib.h | 2 +- > kernel/events/callchain.c | 4 -- > kernel/events/core.c | 3 - > kernel/exit.c | 14 ----- > kernel/kthread.c | 5 -- > kernel/stacktrace.c | 3 - > kernel/trace/bpf_trace.c | 4 -- > mm/maccess.c | 11 ---- > mm/memory.c | 8 --- > net/bpfilter/bpfilter_kern.c | 2 +- > 72 files changed, 10 insertions(+), 522 deletions(-) > delete mode 100644 arch/arc/include/asm/segment.h > delete mode 100644 arch/csky/include/asm/segment.h > delete mode 100644 arch/h8300/include/asm/segment.h > > diff --git a/arch/Kconfig b/arch/Kconfig > index fa5db36bda67..99349547afed 100644 > --- a/arch/Kconfig > +++ b/arch/Kconfig > @@ -24,9 +24,6 @@ config KEXEC_ELF > config HAVE_IMA_KEXEC > bool > > -config SET_FS > - bool > - > config HOTPLUG_SMT > bool > ... > diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig > index f724b3f1aeed..0d68adf6e02b 100644 > --- a/arch/openrisc/Kconfig > +++ b/arch/openrisc/Kconfig > @@ -36,7 +36,6 @@ config OPENRISC > select ARCH_WANT_FRAME_POINTERS > select GENERIC_IRQ_MULTI_HANDLER > select MMU_GATHER_NO_RANGE if MMU > - select SET_FS > select TRACE_IRQFLAGS_SUPPORT > > config CPU_BIG_ENDIAN > diff --git a/arch/openrisc/include/asm/thread_info.h b/arch/openrisc/include/asm/thread_info.h > index 659834ab87fa..4af3049c34c2 100644 > --- a/arch/openrisc/include/asm/thread_info.h > +++ b/arch/openrisc/include/asm/thread_info.h > @@ -40,18 +40,12 @@ > */ > #ifndef __ASSEMBLY__ > > -typedef unsigned long mm_segment_t; > - > struct thread_info { > struct task_struct *task; /* main task structure */ > unsigned long flags; /* low level flags */ > __u32 cpu; /* current CPU */ > __s32 preempt_count; /* 0 => preemptable, <0 => BUG */ > > - mm_segment_t addr_limit; /* thread address space: > - 0-0x7FFFFFFF for user-thead > - 0-0xFFFFFFFF for kernel-thread > - */ > __u8 supervisor_stack[0]; > > /* saved context data */ > @@ -71,7 +65,6 @@ struct thread_info { > .flags = 0, \ > .cpu = 0, \ > .preempt_count = INIT_PREEMPT_COUNT, \ > - .addr_limit = KERNEL_DS, \ > .ksp = 0, \ > } > > diff --git a/arch/openrisc/include/asm/uaccess.h b/arch/openrisc/include/asm/uaccess.h > index 8f049ec99b3e..d6500a374e18 100644 > --- a/arch/openrisc/include/asm/uaccess.h > +++ b/arch/openrisc/include/asm/uaccess.h > @@ -22,29 +22,6 @@ > #include <linux/string.h> > #include <asm/page.h> > #include <asm/extable.h> > - > -/* > - * The fs value determines whether argument validity checking should be > - * performed or not. If get_fs() == USER_DS, checking is performed, with > - * get_fs() == KERNEL_DS, checking is bypassed. > - * > - * For historical reasons, these macros are grossly misnamed. > - */ > - > -/* addr_limit is the maximum accessible address for the task. we misuse > - * the KERNEL_DS and USER_DS values to both assign and compare the > - * addr_limit values through the equally misnamed get/set_fs macros. > - * (see above) > - */ > - > -#define KERNEL_DS (~0UL) > - > -#define USER_DS (TASK_SIZE) > -#define get_fs() (current_thread_info()->addr_limit) > -#define set_fs(x) (current_thread_info()->addr_limit = (x)) > - > -#define uaccess_kernel() (get_fs() == KERNEL_DS) > - > #include <asm-generic/access_ok.h> > > /* ... > diff --git a/fs/exec.c b/fs/exec.c > index 79f2c9483302..bc68a0c089ac 100644 > --- a/fs/exec.c > +++ b/fs/exec.c > @@ -1303,12 +1303,6 @@ int begin_new_exec(struct linux_binprm * bprm) > if (retval) > goto out_unlock; > > - /* > - * Ensure that the uaccess routines can actually operate on userspace > - * pointers: > - */ > - force_uaccess_begin(); > - > if (me->flags & PF_KTHREAD) > free_kthread_struct(me); > me->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_KTHREAD | > diff --git a/include/asm-generic/access_ok.h b/include/asm-generic/access_ok.h > index 1aad8964d2ed..88a7cb5d9aad 100644 > --- a/include/asm-generic/access_ok.h > +++ b/include/asm-generic/access_ok.h > @@ -16,16 +16,8 @@ > #define TASK_SIZE_MAX TASK_SIZE > #endif > > -#ifndef uaccess_kernel > -#ifdef CONFIG_SET_FS > -#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) > -#else > -#define uaccess_kernel() (0) > -#endif > -#endif > - > #ifndef user_addr_max > -#define user_addr_max() (uaccess_kernel() ? ~0UL : TASK_SIZE_MAX) > +#define user_addr_max() TASK_SIZE_MAX > #endif > > #ifndef __access_ok > diff --git a/include/asm-generic/uaccess.h b/include/asm-generic/uaccess.h > index ebc685dc8d74..a5be9e61a2a2 100644 > --- a/include/asm-generic/uaccess.h > +++ b/include/asm-generic/uaccess.h > @@ -8,6 +8,7 @@ > * address space, e.g. all NOMMU machines. > */ > #include <linux/string.h> > +#include <asm-generic/access_ok.h> > > #ifdef CONFIG_UACCESS_MEMCPY > #include <asm/unaligned.h> > @@ -94,30 +95,6 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n) > #define INLINE_COPY_TO_USER > #endif /* CONFIG_UACCESS_MEMCPY */ > > -#ifdef CONFIG_SET_FS > -#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) }) > - > -#ifndef KERNEL_DS > -#define KERNEL_DS MAKE_MM_SEG(~0UL) > -#endif > - > -#ifndef USER_DS > -#define USER_DS MAKE_MM_SEG(TASK_SIZE - 1) > -#endif > - > -#ifndef get_fs > -#define get_fs() (current_thread_info()->addr_limit) > - > -static inline void set_fs(mm_segment_t fs) > -{ > - current_thread_info()->addr_limit = fs; > -} > -#endif > - > -#endif /* CONFIG_SET_FS */ > - > -#include <asm-generic/access_ok.h> > - > /* > * These are the main single-value transfer routines. They automatically > * use the right size if we just have the right pointer type. > diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h > index 819c0cb00b6d..a34b0f9a9972 100644 > --- a/include/linux/syscalls.h > +++ b/include/linux/syscalls.h > @@ -290,10 +290,6 @@ static inline void addr_limit_user_check(void) > return; > #endif > > - if (CHECK_DATA_CORRUPTION(uaccess_kernel(), > - "Invalid address limit on user-mode return")) > - force_sig(SIGKILL); > - > #ifdef TIF_FSCHECK > clear_thread_flag(TIF_FSCHECK); > #endif > diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h > index 2c31667e62e0..2421a41f3a8e 100644 > --- a/include/linux/uaccess.h > +++ b/include/linux/uaccess.h > @@ -10,39 +10,6 @@ > > #include <asm/uaccess.h> > > -#ifdef CONFIG_SET_FS > -/* > - * Force the uaccess routines to be wired up for actual userspace access, > - * overriding any possible set_fs(KERNEL_DS) still lingering around. Undone > - * using force_uaccess_end below. > - */ > -static inline mm_segment_t force_uaccess_begin(void) > -{ > - mm_segment_t fs = get_fs(); > - > - set_fs(USER_DS); > - return fs; > -} > - > -static inline void force_uaccess_end(mm_segment_t oldfs) > -{ > - set_fs(oldfs); > -} > -#else /* CONFIG_SET_FS */ > -typedef struct { > - /* empty dummy */ > -} mm_segment_t; > - > -static inline mm_segment_t force_uaccess_begin(void) > -{ > - return (mm_segment_t) { }; > -} > - > -static inline void force_uaccess_end(mm_segment_t oldfs) > -{ > -} > -#endif /* CONFIG_SET_FS */ > - > /* > * Architectures should provide two primitives (raw_copy_{to,from}_user()) > * and get rid of their private instances of copy_{to,from}_user() and > diff --git a/include/rdma/ib.h b/include/rdma/ib.h > index 83139b9ce409..f7c185ff7a11 100644 > --- a/include/rdma/ib.h > +++ b/include/rdma/ib.h > @@ -75,7 +75,7 @@ struct sockaddr_ib { > */ > static inline bool ib_safe_file_access(struct file *filp) > { > - return filp->f_cred == current_cred() && !uaccess_kernel(); > + return filp->f_cred == current_cred(); > } > > #endif /* _RDMA_IB_H */ > diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c > index 58cbe357fb2b..1273be84392c 100644 > --- a/kernel/events/callchain.c > +++ b/kernel/events/callchain.c > @@ -209,17 +209,13 @@ get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user, > } > > if (regs) { > - mm_segment_t fs; > - > if (crosstask) > goto exit_put; > > if (add_mark) > perf_callchain_store_context(&ctx, PERF_CONTEXT_USER); > > - fs = force_uaccess_begin(); > perf_callchain_user(&ctx, regs); > - force_uaccess_end(fs); > } > } > > diff --git a/kernel/events/core.c b/kernel/events/core.c > index 57c7197838db..11ca7303d6df 100644 > --- a/kernel/events/core.c > +++ b/kernel/events/core.c > @@ -6746,7 +6746,6 @@ perf_output_sample_ustack(struct perf_output_handle *handle, u64 dump_size, > unsigned long sp; > unsigned int rem; > u64 dyn_size; > - mm_segment_t fs; > > /* > * We dump: > @@ -6764,9 +6763,7 @@ perf_output_sample_ustack(struct perf_output_handle *handle, u64 dump_size, > > /* Data. */ > sp = perf_user_stack_pointer(regs); > - fs = force_uaccess_begin(); > rem = __output_copy_user(handle, (void *) sp, dump_size); > - force_uaccess_end(fs); > dyn_size = dump_size - rem; > > perf_output_skip(handle, rem); > diff --git a/kernel/exit.c b/kernel/exit.c > index b00a25bb4ab9..0884a75bc2f8 100644 > --- a/kernel/exit.c > +++ b/kernel/exit.c > @@ -737,20 +737,6 @@ void __noreturn do_exit(long code) > > WARN_ON(blk_needs_flush_plug(tsk)); > > - /* > - * If do_dead is called because this processes oopsed, it's possible > - * that get_fs() was left as KERNEL_DS, so reset it to USER_DS before > - * continuing. Amongst other possible reasons, this is to prevent > - * mm_release()->clear_child_tid() from writing to a user-controlled > - * kernel address. > - * > - * On uptodate architectures force_uaccess_begin is a noop. On > - * architectures that still have set_fs/get_fs in addition to handling > - * oopses handles kernel threads that run as set_fs(KERNEL_DS) by > - * default. > - */ > - force_uaccess_begin(); > - > kcov_task_exit(tsk); > > coredump_task_exit(tsk); > diff --git a/kernel/kthread.c b/kernel/kthread.c > index 38c6dd822da8..16c2275d4b50 100644 > --- a/kernel/kthread.c > +++ b/kernel/kthread.c > @@ -55,7 +55,6 @@ struct kthread { > int result; > int (*threadfn)(void *); > void *data; > - mm_segment_t oldfs; > struct completion parked; > struct completion exited; > #ifdef CONFIG_BLK_CGROUP > @@ -1441,8 +1440,6 @@ void kthread_use_mm(struct mm_struct *mm) > mmdrop(active_mm); > else > smp_mb(); > - > - to_kthread(tsk)->oldfs = force_uaccess_begin(); > } > EXPORT_SYMBOL_GPL(kthread_use_mm); > > @@ -1457,8 +1454,6 @@ void kthread_unuse_mm(struct mm_struct *mm) > WARN_ON_ONCE(!(tsk->flags & PF_KTHREAD)); > WARN_ON_ONCE(!tsk->mm); > > - force_uaccess_end(to_kthread(tsk)->oldfs); > - > task_lock(tsk); > /* > * When a kthread stops operating on an address space, the loop > diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c > index 9c625257023d..9ed5ce989415 100644 > --- a/kernel/stacktrace.c > +++ b/kernel/stacktrace.c > @@ -226,15 +226,12 @@ unsigned int stack_trace_save_user(unsigned long *store, unsigned int size) > .store = store, > .size = size, > }; > - mm_segment_t fs; > > /* Trace user stack if not a kernel thread */ > if (current->flags & PF_KTHREAD) > return 0; > > - fs = force_uaccess_begin(); > arch_stack_walk_user(consume_entry, &c, task_pt_regs(current)); > - force_uaccess_end(fs); > > return c.len; > } > diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c > index 21aa30644219..8115fff17018 100644 > --- a/kernel/trace/bpf_trace.c > +++ b/kernel/trace/bpf_trace.c > @@ -332,8 +332,6 @@ BPF_CALL_3(bpf_probe_write_user, void __user *, unsafe_ptr, const void *, src, > if (unlikely(in_interrupt() || > current->flags & (PF_KTHREAD | PF_EXITING))) > return -EPERM; > - if (unlikely(uaccess_kernel())) > - return -EPERM; > if (unlikely(!nmi_uaccess_okay())) > return -EPERM; > > @@ -835,8 +833,6 @@ static int bpf_send_signal_common(u32 sig, enum pid_type type) > */ > if (unlikely(current->flags & (PF_KTHREAD | PF_EXITING))) > return -EPERM; > - if (unlikely(uaccess_kernel())) > - return -EPERM; > if (unlikely(!nmi_uaccess_okay())) > return -EPERM; > > diff --git a/mm/maccess.c b/mm/maccess.c > index cbd1b3959af2..106820b33a2b 100644 > --- a/mm/maccess.c > +++ b/mm/maccess.c > @@ -113,14 +113,11 @@ long strncpy_from_kernel_nofault(char *dst, const void *unsafe_addr, long count) > long copy_from_user_nofault(void *dst, const void __user *src, size_t size) > { > long ret = -EFAULT; > - mm_segment_t old_fs = force_uaccess_begin(); > - > if (access_ok(src, size)) { > pagefault_disable(); > ret = __copy_from_user_inatomic(dst, src, size); > pagefault_enable(); > } > - force_uaccess_end(old_fs); > > if (ret) > return -EFAULT; > @@ -140,14 +137,12 @@ EXPORT_SYMBOL_GPL(copy_from_user_nofault); > long copy_to_user_nofault(void __user *dst, const void *src, size_t size) > { > long ret = -EFAULT; > - mm_segment_t old_fs = force_uaccess_begin(); > > if (access_ok(dst, size)) { > pagefault_disable(); > ret = __copy_to_user_inatomic(dst, src, size); > pagefault_enable(); > } > - force_uaccess_end(old_fs); > > if (ret) > return -EFAULT; > @@ -176,17 +171,14 @@ EXPORT_SYMBOL_GPL(copy_to_user_nofault); > long strncpy_from_user_nofault(char *dst, const void __user *unsafe_addr, > long count) > { > - mm_segment_t old_fs; > long ret; > > if (unlikely(count <= 0)) > return 0; > > - old_fs = force_uaccess_begin(); > pagefault_disable(); > ret = strncpy_from_user(dst, unsafe_addr, count); > pagefault_enable(); > - force_uaccess_end(old_fs); > > if (ret >= count) { > ret = count; > @@ -216,14 +208,11 @@ long strncpy_from_user_nofault(char *dst, const void __user *unsafe_addr, > */ > long strnlen_user_nofault(const void __user *unsafe_addr, long count) > { > - mm_segment_t old_fs; > int ret; > > - old_fs = force_uaccess_begin(); > pagefault_disable(); > ret = strnlen_user(unsafe_addr, count); > pagefault_enable(); > - force_uaccess_end(old_fs); > > return ret; > } > diff --git a/mm/memory.c b/mm/memory.c > index c125c4969913..9a6ebf68a846 100644 > --- a/mm/memory.c > +++ b/mm/memory.c > @@ -5256,14 +5256,6 @@ void print_vma_addr(char *prefix, unsigned long ip) > #if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_DEBUG_ATOMIC_SLEEP) > void __might_fault(const char *file, int line) > { > - /* > - * Some code (nfs/sunrpc) uses socket ops on kernel memory while > - * holding the mmap_lock, this is safe because kernel memory doesn't > - * get paged out, therefore we'll never actually fault, and the > - * below annotations will generate false positives. > - */ > - if (uaccess_kernel()) > - return; > if (pagefault_disabled()) > return; > __might_sleep(file, line); > diff --git a/net/bpfilter/bpfilter_kern.c b/net/bpfilter/bpfilter_kern.c > index 51a941b56ec3..422ec6e7ccff 100644 Acked-by: Stafford Horne <shorne@xxxxxxxxx> [openrisc, asm-generic] Thanks!