In order to allow inlining the generic entry C functions, move them to include/linux/entry-common.h. Signed-off-by: Sven Schnelle <svens@xxxxxxxxxxxxx> --- include/linux/entry-common.h | 182 ++++++++++++++++++++++++++++++++++- kernel/entry/common.c | 168 -------------------------------- 2 files changed, 179 insertions(+), 171 deletions(-) diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index d95ab85f96ba..b409fbcbd3ac 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -7,8 +7,13 @@ #include <linux/syscalls.h> #include <linux/seccomp.h> #include <linux/sched.h> - +#include <linux/livepatch.h> #include <asm/entry-common.h> +#include <linux/context_tracking.h> +#include <linux/resume_user_mode.h> +#include <linux/tick.h> + +#include <trace/events/syscalls.h> /* * Define dummy _TIF work flags if not defined by the architecture or for @@ -291,7 +296,7 @@ void exit_to_user_mode(void); * make the final state transitions. Interrupts must stay disabled between * return from this function and the invocation of exit_to_user_mode(). */ -void syscall_exit_to_user_mode_work(struct pt_regs *regs); +static void syscall_exit_to_user_mode_work(struct pt_regs *regs); /** * syscall_exit_to_user_mode - Handle work before returning to user mode @@ -350,7 +355,7 @@ void irqentry_enter_from_user_mode(struct pt_regs *regs); * Interrupt exit is not invoking #1 which is the syscall specific one time * work. */ -void irqentry_exit_to_user_mode(struct pt_regs *regs); +static void irqentry_exit_to_user_mode(struct pt_regs *regs); #ifndef irqentry_state /** @@ -465,4 +470,175 @@ irqentry_state_t noinstr irqentry_nmi_enter(struct pt_regs *regs); */ void noinstr irqentry_nmi_exit(struct pt_regs *regs, irqentry_state_t irq_state); +static unsigned long exit_to_user_mode_loop(struct pt_regs *regs, + unsigned long ti_work) +{ + /* + * Before returning to user space ensure that all pending work + * items have been completed. + */ + while (ti_work & EXIT_TO_USER_MODE_WORK) { + + local_irq_enable_exit_to_user(ti_work); + + if (ti_work & _TIF_NEED_RESCHED) + schedule(); + + if (ti_work & _TIF_UPROBE) + uprobe_notify_resume(regs); + + if (ti_work & _TIF_PATCH_PENDING) + klp_update_patch_state(current); + + if (ti_work & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) + arch_do_signal_or_restart(regs); + + if (ti_work & _TIF_NOTIFY_RESUME) + resume_user_mode_work(regs); + + /* Architecture specific TIF work */ + arch_exit_to_user_mode_work(regs, ti_work); + + /* + * Disable interrupts and reevaluate the work flags as they + * might have changed while interrupts and preemption was + * enabled above. + */ + local_irq_disable_exit_to_user(); + + /* Check if any of the above work has queued a deferred wakeup */ + tick_nohz_user_enter_prepare(); + + ti_work = read_thread_flags(); + } + + /* Return the latest work state for arch_exit_to_user_mode() */ + return ti_work; +} + + +static void exit_to_user_mode_prepare(struct pt_regs *regs) +{ + unsigned long ti_work; + + lockdep_assert_irqs_disabled(); + + /* Flush pending rcuog wakeup before the last need_resched() check */ + tick_nohz_user_enter_prepare(); + + ti_work = read_thread_flags(); + if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK)) + ti_work = exit_to_user_mode_loop(regs, ti_work); + + arch_exit_to_user_mode_prepare(regs, ti_work); + + /* Ensure that the address limit is intact and no locks are held */ + addr_limit_user_check(); + kmap_assert_nomap(); + lockdep_assert_irqs_disabled(); + lockdep_sys_exit(); +} + +/* + * If SYSCALL_EMU is set, then the only reason to report is when + * SINGLESTEP is set (i.e. PTRACE_SYSEMU_SINGLESTEP). This syscall + * instruction has been already reported in syscall_enter_from_user_mode(). + */ +static inline bool report_single_step(unsigned long work) +{ + if (work & SYSCALL_WORK_SYSCALL_EMU) + return false; + + return work & SYSCALL_WORK_SYSCALL_EXIT_TRAP; +} + +static void syscall_exit_work(struct pt_regs *regs, unsigned long work) +{ + bool step; + + /* + * If the syscall was rolled back due to syscall user dispatching, + * then the tracers below are not invoked for the same reason as + * the entry side was not invoked in syscall_trace_enter(): The ABI + * of these syscalls is unknown. + */ + if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) { + if (unlikely(current->syscall_dispatch.on_dispatch)) { + current->syscall_dispatch.on_dispatch = false; + return; + } + } + + audit_syscall_exit(regs); + + if (work & SYSCALL_WORK_SYSCALL_TRACEPOINT) + trace_sys_exit(regs, syscall_get_return_value(current, regs)); + + step = report_single_step(work); + if (step || work & SYSCALL_WORK_SYSCALL_TRACE) + ptrace_report_syscall_exit(regs, step); +} + +/* + * Syscall specific exit to user mode preparation. Runs with interrupts + * enabled. + */ +static __always_inline void syscall_exit_to_user_mode_prepare(struct pt_regs *regs) +{ + unsigned long work = READ_ONCE(current_thread_info()->syscall_work); + unsigned long nr = syscall_get_nr(current, regs); + + CT_WARN_ON(ct_state() != CONTEXT_KERNEL); + + if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { + if (WARN(irqs_disabled(), "syscall %lu left IRQs disabled", nr)) + local_irq_enable(); + } + + rseq_syscall(regs); + + /* + * Do one-time syscall specific work. If these work items are + * enabled, we want to run them exactly once per syscall exit with + * interrupts enabled. + */ + if (unlikely(work & SYSCALL_WORK_EXIT)) + syscall_exit_work(regs, work); +} + +static __always_inline void __syscall_exit_to_user_mode_work(struct pt_regs *regs) +{ + syscall_exit_to_user_mode_prepare(regs); + local_irq_disable_exit_to_user(); + exit_to_user_mode_prepare(regs); +} + +static __always_inline void syscall_exit_to_user_mode_work(struct pt_regs *regs) +{ + __syscall_exit_to_user_mode_work(regs); +} + +/* See comment for exit_to_user_mode() in entry-common.h */ +static __always_inline void __exit_to_user_mode(void) +{ + instrumentation_begin(); + trace_hardirqs_on_prepare(); + lockdep_hardirqs_on_prepare(); + instrumentation_end(); + + user_enter_irqoff(); + arch_exit_to_user_mode(); + lockdep_hardirqs_on(CALLER_ADDR0); +} + + +static __always_inline void irqentry_exit_to_user_mode(struct pt_regs *regs) +{ + instrumentation_begin(); + exit_to_user_mode_prepare(regs); + instrumentation_end(); + __exit_to_user_mode(); +} + + #endif diff --git a/kernel/entry/common.c b/kernel/entry/common.c index be61332c66b5..66af971c3fe4 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -123,19 +123,6 @@ noinstr void syscall_enter_from_user_mode_prepare(struct pt_regs *regs) instrumentation_end(); } -/* See comment for exit_to_user_mode() in entry-common.h */ -static __always_inline void __exit_to_user_mode(void) -{ - instrumentation_begin(); - trace_hardirqs_on_prepare(); - lockdep_hardirqs_on_prepare(); - instrumentation_end(); - - user_enter_irqoff(); - arch_exit_to_user_mode(); - lockdep_hardirqs_on(CALLER_ADDR0); -} - void noinstr exit_to_user_mode(void) { __exit_to_user_mode(); @@ -144,153 +131,6 @@ void noinstr exit_to_user_mode(void) /* Workaround to allow gradual conversion of architecture code */ void __weak arch_do_signal_or_restart(struct pt_regs *regs) { } -static unsigned long exit_to_user_mode_loop(struct pt_regs *regs, - unsigned long ti_work) -{ - /* - * Before returning to user space ensure that all pending work - * items have been completed. - */ - while (ti_work & EXIT_TO_USER_MODE_WORK) { - - local_irq_enable_exit_to_user(ti_work); - - if (ti_work & _TIF_NEED_RESCHED) - schedule(); - - if (ti_work & _TIF_UPROBE) - uprobe_notify_resume(regs); - - if (ti_work & _TIF_PATCH_PENDING) - klp_update_patch_state(current); - - if (ti_work & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) - arch_do_signal_or_restart(regs); - - if (ti_work & _TIF_NOTIFY_RESUME) - resume_user_mode_work(regs); - - /* Architecture specific TIF work */ - arch_exit_to_user_mode_work(regs, ti_work); - - /* - * Disable interrupts and reevaluate the work flags as they - * might have changed while interrupts and preemption was - * enabled above. - */ - local_irq_disable_exit_to_user(); - - /* Check if any of the above work has queued a deferred wakeup */ - tick_nohz_user_enter_prepare(); - - ti_work = read_thread_flags(); - } - - /* Return the latest work state for arch_exit_to_user_mode() */ - return ti_work; -} - -static void exit_to_user_mode_prepare(struct pt_regs *regs) -{ - unsigned long ti_work; - - lockdep_assert_irqs_disabled(); - - /* Flush pending rcuog wakeup before the last need_resched() check */ - tick_nohz_user_enter_prepare(); - - ti_work = read_thread_flags(); - if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK)) - ti_work = exit_to_user_mode_loop(regs, ti_work); - - arch_exit_to_user_mode_prepare(regs, ti_work); - - /* Ensure that the address limit is intact and no locks are held */ - addr_limit_user_check(); - kmap_assert_nomap(); - lockdep_assert_irqs_disabled(); - lockdep_sys_exit(); -} - -/* - * If SYSCALL_EMU is set, then the only reason to report is when - * SINGLESTEP is set (i.e. PTRACE_SYSEMU_SINGLESTEP). This syscall - * instruction has been already reported in syscall_enter_from_user_mode(). - */ -static inline bool report_single_step(unsigned long work) -{ - if (work & SYSCALL_WORK_SYSCALL_EMU) - return false; - - return work & SYSCALL_WORK_SYSCALL_EXIT_TRAP; -} - -static void syscall_exit_work(struct pt_regs *regs, unsigned long work) -{ - bool step; - - /* - * If the syscall was rolled back due to syscall user dispatching, - * then the tracers below are not invoked for the same reason as - * the entry side was not invoked in syscall_trace_enter(): The ABI - * of these syscalls is unknown. - */ - if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) { - if (unlikely(current->syscall_dispatch.on_dispatch)) { - current->syscall_dispatch.on_dispatch = false; - return; - } - } - - audit_syscall_exit(regs); - - if (work & SYSCALL_WORK_SYSCALL_TRACEPOINT) - trace_sys_exit(regs, syscall_get_return_value(current, regs)); - - step = report_single_step(work); - if (step || work & SYSCALL_WORK_SYSCALL_TRACE) - ptrace_report_syscall_exit(regs, step); -} - -/* - * Syscall specific exit to user mode preparation. Runs with interrupts - * enabled. - */ -static void syscall_exit_to_user_mode_prepare(struct pt_regs *regs) -{ - unsigned long work = READ_ONCE(current_thread_info()->syscall_work); - unsigned long nr = syscall_get_nr(current, regs); - - CT_WARN_ON(ct_state() != CONTEXT_KERNEL); - - if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { - if (WARN(irqs_disabled(), "syscall %lu left IRQs disabled", nr)) - local_irq_enable(); - } - - rseq_syscall(regs); - - /* - * Do one-time syscall specific work. If these work items are - * enabled, we want to run them exactly once per syscall exit with - * interrupts enabled. - */ - if (unlikely(work & SYSCALL_WORK_EXIT)) - syscall_exit_work(regs, work); -} - -static __always_inline void __syscall_exit_to_user_mode_work(struct pt_regs *regs) -{ - syscall_exit_to_user_mode_prepare(regs); - local_irq_disable_exit_to_user(); - exit_to_user_mode_prepare(regs); -} - -void syscall_exit_to_user_mode_work(struct pt_regs *regs) -{ - __syscall_exit_to_user_mode_work(regs); -} - __visible noinstr void syscall_exit_to_user_mode(struct pt_regs *regs) { instrumentation_begin(); @@ -304,14 +144,6 @@ noinstr void irqentry_enter_from_user_mode(struct pt_regs *regs) __enter_from_user_mode(regs); } -noinstr void irqentry_exit_to_user_mode(struct pt_regs *regs) -{ - instrumentation_begin(); - exit_to_user_mode_prepare(regs); - instrumentation_end(); - __exit_to_user_mode(); -} - noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs) { irqentry_state_t ret = { -- 2.39.2