On Fri, Jun 24, 2016 at 2:34 PM, Andy Lutomirski <luto@xxxxxxxxxxxxxx> wrote: >> >> So let me get the pure semantic patches done, and then for 4.8 when we >> do the things that actually change real meaning we'll have a sane >> base. Ok? > > Works for me. I'll see whether my vmap patches still apply and, if > needed, rebase and send a v5. Ok, I'e pushed out the cleanups (and all the pulls that always come in on Friday afternoon - gaah, I shouldn't have tried doing this on a Friday). I'm attaching the current left-over patch that actually changes things. It's obviously a composite, and includes your "remove stack_smp_processor_id()" thing etc, so it's not meant to be used as-is, but it does seem to work. Interestingly, it seems pretty clean too, removing more lines than it adds (despite the fact that it adds a new config option), and generally making things prettier rather than the reverse. That's always a good sign. Linus
This is an attempt at moving the thread_info into the task_struct arch/x86/Kconfig | 1 + arch/x86/entry/common.c | 21 +++++++-------------- arch/x86/entry/entry_64.S | 9 ++++++--- arch/x86/include/asm/cpu.h | 1 - arch/x86/include/asm/smp.h | 6 ------ arch/x86/include/asm/switch_to.h | 6 ++---- arch/x86/include/asm/thread_info.h | 37 +++---------------------------------- arch/x86/kernel/cpu/common.c | 2 +- arch/x86/kernel/irq_64.c | 3 +-- arch/x86/kernel/process.c | 6 ++---- arch/x86/um/ptrace_32.c | 8 ++++---- include/linux/init_task.h | 9 +++++++++ include/linux/sched.h | 18 +++++++++++++++--- init/Kconfig | 3 +++ init/init_task.c | 7 +++++-- 15 files changed, 59 insertions(+), 78 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index d9a94da0c29f..f33bc80577c5 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -154,6 +154,7 @@ config X86 select SPARSE_IRQ select SRCU select SYSCTL_EXCEPTION_TRACE + select THREAD_INFO_IN_TASK select USER_STACKTRACE_SUPPORT select VIRT_TO_BUS select X86_DEV_DMA_OPS if X86_64 diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index ec138e538c44..d5feac5f252d 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -31,13 +31,6 @@ #define CREATE_TRACE_POINTS #include <trace/events/syscalls.h> -static struct thread_info *pt_regs_to_thread_info(struct pt_regs *regs) -{ - unsigned long top_of_stack = - (unsigned long)(regs + 1) + TOP_OF_KERNEL_STACK_PADDING; - return (struct thread_info *)(top_of_stack - THREAD_SIZE); -} - #ifdef CONFIG_CONTEXT_TRACKING /* Called on entry from user mode with IRQs off. */ __visible void enter_from_user_mode(void) @@ -78,7 +71,7 @@ static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch) */ unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch) { - struct thread_info *ti = pt_regs_to_thread_info(regs); + struct thread_info *ti = current_thread_info(); unsigned long ret = 0; u32 work; @@ -156,7 +149,7 @@ unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch) long syscall_trace_enter_phase2(struct pt_regs *regs, u32 arch, unsigned long phase1_result) { - struct thread_info *ti = pt_regs_to_thread_info(regs); + struct thread_info *ti = current_thread_info(); long ret = 0; u32 work = ACCESS_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY; @@ -239,7 +232,7 @@ static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags) /* Disable IRQs and retry */ local_irq_disable(); - cached_flags = READ_ONCE(pt_regs_to_thread_info(regs)->flags); + cached_flags = READ_ONCE(current_thread_info()->flags); if (!(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS)) break; @@ -250,7 +243,7 @@ static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags) /* Called with IRQs disabled. */ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs) { - struct thread_info *ti = pt_regs_to_thread_info(regs); + struct thread_info *ti = current_thread_info(); u32 cached_flags; if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled())) @@ -309,7 +302,7 @@ static void syscall_slow_exit_work(struct pt_regs *regs, u32 cached_flags) */ __visible inline void syscall_return_slowpath(struct pt_regs *regs) { - struct thread_info *ti = pt_regs_to_thread_info(regs); + struct thread_info *ti = current_thread_info(); u32 cached_flags = READ_ONCE(ti->flags); CT_WARN_ON(ct_state() != CONTEXT_KERNEL); @@ -332,7 +325,7 @@ __visible inline void syscall_return_slowpath(struct pt_regs *regs) #ifdef CONFIG_X86_64 __visible void do_syscall_64(struct pt_regs *regs) { - struct thread_info *ti = pt_regs_to_thread_info(regs); + struct thread_info *ti = current_thread_info(); unsigned long nr = regs->orig_ax; enter_from_user_mode(); @@ -365,7 +358,7 @@ __visible void do_syscall_64(struct pt_regs *regs) */ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) { - struct thread_info *ti = pt_regs_to_thread_info(regs); + struct thread_info *ti = current_thread_info(); unsigned int nr = (unsigned int)regs->orig_ax; #ifdef CONFIG_IA32_EMULATION diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 9ee0da1807ed..f49742de2c65 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -179,7 +179,8 @@ GLOBAL(entry_SYSCALL_64_after_swapgs) * If we need to do entry work or if we guess we'll need to do * exit work, go straight to the slow path. */ - testl $_TIF_WORK_SYSCALL_ENTRY|_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) + GET_THREAD_INFO(%r11) + testl $_TIF_WORK_SYSCALL_ENTRY|_TIF_ALLWORK_MASK, TI_flags(%r11) jnz entry_SYSCALL64_slow_path entry_SYSCALL_64_fastpath: @@ -217,7 +218,8 @@ entry_SYSCALL_64_fastpath: */ DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF - testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) + GET_THREAD_INFO(%r11) + testl $_TIF_ALLWORK_MASK, TI_flags(%r11) jnz 1f LOCKDEP_SYS_EXIT @@ -368,9 +370,10 @@ END(ptregs_\func) * A newly forked process directly context switches into this address. * * rdi: prev task we switched from + * rsi: task we're switching to */ ENTRY(ret_from_fork) - LOCK ; btr $TIF_FORK, TI_flags(%r8) + LOCK ; btr $TIF_FORK, TI_flags(%rsi) /* rsi: this newly forked task */ call schedule_tail /* rdi: 'prev' task parameter */ diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index 678637ad7476..59d34c521d96 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -17,7 +17,6 @@ static inline void prefill_possible_map(void) {} #define cpu_physical_id(cpu) boot_cpu_physical_apicid #define safe_smp_processor_id() 0 -#define stack_smp_processor_id() 0 #endif /* CONFIG_SMP */ diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 66b057306f40..0576b6157f3a 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -172,12 +172,6 @@ extern int safe_smp_processor_id(void); #elif defined(CONFIG_X86_64_SMP) #define raw_smp_processor_id() (this_cpu_read(cpu_number)) -#define stack_smp_processor_id() \ -({ \ - struct thread_info *ti; \ - __asm__("andq %%rsp,%0; ":"=r" (ti) : "0" (CURRENT_MASK)); \ - ti->cpu; \ -}) #define safe_smp_processor_id() smp_processor_id() #endif diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h index 8f321a1b03a1..ae0aa0612c67 100644 --- a/arch/x86/include/asm/switch_to.h +++ b/arch/x86/include/asm/switch_to.h @@ -110,18 +110,16 @@ do { \ "call __switch_to\n\t" \ "movq "__percpu_arg([current_task])",%%rsi\n\t" \ __switch_canary \ - "movq %P[thread_info](%%rsi),%%r8\n\t" \ "movq %%rax,%%rdi\n\t" \ - "testl %[_tif_fork],%P[ti_flags](%%r8)\n\t" \ + "testl %[_tif_fork],%P[ti_flags](%%rsi)\n\t" \ "jnz ret_from_fork\n\t" \ RESTORE_CONTEXT \ : "=a" (last) \ __switch_canary_oparam \ : [next] "S" (next), [prev] "D" (prev), \ [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \ - [ti_flags] "i" (offsetof(struct thread_info, flags)), \ + [ti_flags] "i" (offsetof(struct task_struct, thread_info.flags)), \ [_tif_fork] "i" (_TIF_FORK), \ - [thread_info] "i" (offsetof(struct task_struct, stack)), \ [current_task] "m" (current_task) \ __switch_canary_iparam \ : "memory", "cc" __EXTRA_CLOBBER) diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 30c133ac05cd..d38ebb08f4c1 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -53,24 +53,21 @@ struct task_struct; #include <linux/atomic.h> struct thread_info { - struct task_struct *task; /* main task structure */ __u32 flags; /* low level flags */ __u32 status; /* thread synchronous flags */ __u32 cpu; /* current CPU */ - mm_segment_t addr_limit; unsigned int sig_on_uaccess_error:1; unsigned int uaccess_err:1; /* uaccess failed */ + mm_segment_t addr_limit; }; #define INIT_THREAD_INFO(tsk) \ { \ - .task = &tsk, \ .flags = 0, \ .cpu = 0, \ .addr_limit = KERNEL_DS, \ } -#define init_thread_info (init_thread_union.thread_info) #define init_stack (init_thread_union.stack) #else /* !__ASSEMBLY__ */ @@ -166,7 +163,7 @@ struct thread_info { static inline struct thread_info *current_thread_info(void) { - return (struct thread_info *)(current_top_of_stack() - THREAD_SIZE); + return (struct thread_info *)current; } static inline unsigned long current_stack_pointer(void) @@ -188,35 +185,7 @@ static inline unsigned long current_stack_pointer(void) /* Load thread_info address into "reg" */ #define GET_THREAD_INFO(reg) \ - _ASM_MOV PER_CPU_VAR(cpu_current_top_of_stack),reg ; \ - _ASM_SUB $(THREAD_SIZE),reg ; - -/* - * ASM operand which evaluates to a 'thread_info' address of - * the current task, if it is known that "reg" is exactly "off" - * bytes below the top of the stack currently. - * - * ( The kernel stack's size is known at build time, it is usually - * 2 or 4 pages, and the bottom of the kernel stack contains - * the thread_info structure. So to access the thread_info very - * quickly from assembly code we can calculate down from the - * top of the kernel stack to the bottom, using constant, - * build-time calculations only. ) - * - * For example, to fetch the current thread_info->flags value into %eax - * on x86-64 defconfig kernels, in syscall entry code where RSP is - * currently at exactly SIZEOF_PTREGS bytes away from the top of the - * stack: - * - * mov ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS), %eax - * - * will translate to: - * - * 8b 84 24 b8 c0 ff ff mov -0x3f48(%rsp), %eax - * - * which is below the current RSP by almost 16K. - */ -#define ASM_THREAD_INFO(field, reg, off) ((field)+(off)-THREAD_SIZE)(reg) + _ASM_MOV PER_CPU_VAR(current_task),reg #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 0fe6953f421c..d22a7b9c4f0e 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1452,7 +1452,7 @@ void cpu_init(void) struct task_struct *me; struct tss_struct *t; unsigned long v; - int cpu = stack_smp_processor_id(); + int cpu = raw_smp_processor_id(); int i; wait_for_master_cpu(cpu); diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 206d0b90a3ab..38f9f5678dc8 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -41,8 +41,7 @@ static inline void stack_overflow_check(struct pt_regs *regs) if (user_mode(regs)) return; - if (regs->sp >= curbase + sizeof(struct thread_info) + - sizeof(struct pt_regs) + STACK_TOP_MARGIN && + if (regs->sp >= curbase + sizeof(struct pt_regs) + STACK_TOP_MARGIN && regs->sp <= curbase + THREAD_SIZE) return; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 96becbbb52e0..8f60f810a9e7 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -536,9 +536,7 @@ unsigned long get_wchan(struct task_struct *p) * PADDING * ----------- top = topmax - TOP_OF_KERNEL_STACK_PADDING * stack - * ----------- bottom = start + sizeof(thread_info) - * thread_info - * ----------- start + * ----------- bottom = start * * The tasks stack pointer points at the location where the * framepointer is stored. The data on the stack is: @@ -549,7 +547,7 @@ unsigned long get_wchan(struct task_struct *p) */ top = start + THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; top -= 2 * sizeof(unsigned long); - bottom = start + sizeof(struct thread_info); + bottom = start; sp = READ_ONCE(p->thread.sp); if (sp < bottom || sp > top) diff --git a/arch/x86/um/ptrace_32.c b/arch/x86/um/ptrace_32.c index ebd4dd6ef73b..14e8f6a628c2 100644 --- a/arch/x86/um/ptrace_32.c +++ b/arch/x86/um/ptrace_32.c @@ -191,7 +191,7 @@ int peek_user(struct task_struct *child, long addr, long data) static int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *child) { - int err, n, cpu = ((struct thread_info *) child->stack)->cpu; + int err, n, cpu = task_thread_info(child)->cpu; struct user_i387_struct fpregs; err = save_i387_registers(userspace_pid[cpu], @@ -208,7 +208,7 @@ static int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *c static int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *child) { - int n, cpu = ((struct thread_info *) child->stack)->cpu; + int n, cpu = task_thread_info(child)->cpu; struct user_i387_struct fpregs; n = copy_from_user(&fpregs, buf, sizeof(fpregs)); @@ -221,7 +221,7 @@ static int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *c static int get_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child) { - int err, n, cpu = ((struct thread_info *) child->stack)->cpu; + int err, n, cpu = task_thread_info(child)->cpu; struct user_fxsr_struct fpregs; err = save_fpx_registers(userspace_pid[cpu], (unsigned long *) &fpregs); @@ -237,7 +237,7 @@ static int get_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct * static int set_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child) { - int n, cpu = ((struct thread_info *) child->stack)->cpu; + int n, cpu = task_thread_info(child)->cpu; struct user_fxsr_struct fpregs; n = copy_from_user(&fpregs, buf, sizeof(fpregs)); diff --git a/include/linux/init_task.h b/include/linux/init_task.h index f8834f820ec2..9c04d44eeb3c 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -15,6 +15,8 @@ #include <net/net_namespace.h> #include <linux/sched/rt.h> +#include <asm/thread_info.h> + #ifdef CONFIG_SMP # define INIT_PUSHABLE_TASKS(tsk) \ .pushable_tasks = PLIST_NODE_INIT(tsk.pushable_tasks, MAX_PRIO), @@ -183,12 +185,19 @@ extern struct task_group root_task_group; # define INIT_KASAN(tsk) #endif +#ifdef CONFIG_THREAD_INFO_IN_TASK +# define INIT_TASK_TI(tsk) .thread_info = INIT_THREAD_INFO(tsk), +#else +# define INIT_TASK_TI(tsk) +#endif + /* * INIT_TASK is used to set up the first task table, touch at * your own risk!. Base=0, limit=0x1fffff (=2MB) */ #define INIT_TASK(tsk) \ { \ + INIT_TASK_TI(tsk) \ .state = 0, \ .stack = init_stack, \ .usage = ATOMIC_INIT(2), \ diff --git a/include/linux/sched.h b/include/linux/sched.h index 253538f29ade..e34cf214acfe 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1456,8 +1456,11 @@ struct tlbflush_unmap_batch { }; struct task_struct { +#ifdef CONFIG_THREAD_INFO_IN_TASK + struct thread_info thread_info; +#endif volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ - void *stack; + unsigned long *stack; atomic_t usage; unsigned int flags; /* per process flags, defined below */ unsigned int ptrace; @@ -2539,7 +2542,9 @@ extern void set_curr_task(int cpu, struct task_struct *p); void yield(void); union thread_union { +#ifndef CONFIG_THREAD_INFO_IN_TASK struct thread_info thread_info; +#endif unsigned long stack[THREAD_SIZE/sizeof(long)]; }; @@ -2967,10 +2972,17 @@ static inline void threadgroup_change_end(struct task_struct *tsk) cgroup_threadgroup_change_end(tsk); } -#ifndef __HAVE_THREAD_FUNCTIONS +#ifdef CONFIG_THREAD_INFO_IN_TASK + +#define task_thread_info(task) (&(task)->thread_info) +#define task_stack_page(task) ((void *)(task)->stack) +#define setup_thread_stack(new,old) do { } while(0) +#define end_of_stack(task) ((task)->stack) + +#elif !defined(__HAVE_THREAD_FUNCTIONS) #define task_thread_info(task) ((struct thread_info *)(task)->stack) -#define task_stack_page(task) ((task)->stack) +#define task_stack_page(task) ((void *)(task)->stack) static inline void setup_thread_stack(struct task_struct *p, struct task_struct *org) { diff --git a/init/Kconfig b/init/Kconfig index f755a602d4a1..0c83af6d3753 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -26,6 +26,9 @@ config IRQ_WORK config BUILDTIME_EXTABLE_SORT bool +config THREAD_INFO_IN_TASK + bool + menu "General setup" config BROKEN diff --git a/init/init_task.c b/init/init_task.c index ba0a7f362d9e..11f83be1fa79 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -22,5 +22,8 @@ EXPORT_SYMBOL(init_task); * Initial thread structure. Alignment of this is handled by a special * linker map entry. */ -union thread_union init_thread_union __init_task_data = - { INIT_THREAD_INFO(init_task) }; +union thread_union init_thread_union __init_task_data = { +#ifndef CONFIG_THREAD_INFO_IN_TASK + INIT_THREAD_INFO(init_task) +#endif +};