In prepare_exit_to_usermode(), run cleanup for tasks exited fromi isolation and call task_isolation_start() for tasks that entered TIF_TASK_ISOLATION. In syscall_trace_enter(), add the necessary support for reporting syscalls for task-isolation processes. Add task_isolation_remote() calls for the kernel exception types that do not result in signals, namely non-signalling page faults. Add task_isolation_kernel_enter() calls to interrupt and syscall entry handlers. This mechanism relies on calls to functions that call task_isolation_kernel_enter() early after entry into kernel. Those functions are: enter_from_user_mode() called from do_syscall_64(), do_int80_syscall_32(), do_fast_syscall_32(), idtentry_enter_user(), idtentry_enter_cond_rcu() idtentry_enter_cond_rcu() called from non-raw IDT macros and other entry points idtentry_enter_user() nmi_enter() xen_call_function_interrupt() xen_call_function_single_interrupt() xen_irq_work_interrupt() Signed-off-by: Chris Metcalf <cmetcalf@xxxxxxxxxxxx> [abelits@xxxxxxxxxxx: adapted for kernel 5.8] Signed-off-by: Alex Belits <abelits@xxxxxxxxxxx> --- arch/x86/Kconfig | 1 + arch/x86/entry/common.c | 20 +++++++++++++++++++- arch/x86/include/asm/barrier.h | 2 ++ arch/x86/include/asm/thread_info.h | 4 +++- arch/x86/kernel/apic/ipi.c | 2 ++ arch/x86/mm/fault.c | 4 ++++ arch/x86/xen/smp.c | 3 +++ arch/x86/xen/smp_pv.c | 2 ++ 8 files changed, 36 insertions(+), 2 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 883da0abf779..3a80142f85c8 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -149,6 +149,7 @@ config X86 select HAVE_ARCH_COMPAT_MMAP_BASES if MMU && COMPAT select HAVE_ARCH_PREL32_RELOCATIONS select HAVE_ARCH_SECCOMP_FILTER + select HAVE_ARCH_TASK_ISOLATION select HAVE_ARCH_THREAD_STRUCT_WHITELIST select HAVE_ARCH_STACKLEAK select HAVE_ARCH_TRACEHOOK diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index f09288431f28..ab94d90a2bd5 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -26,6 +26,7 @@ #include <linux/livepatch.h> #include <linux/syscalls.h> #include <linux/uaccess.h> +#include <linux/isolation.h> #ifdef CONFIG_XEN_PV #include <xen/xen-ops.h> @@ -86,6 +87,7 @@ static noinstr void enter_from_user_mode(void) { enum ctx_state state = ct_state(); + task_isolation_kernel_enter(); lockdep_hardirqs_off(CALLER_ADDR0); user_exit_irqoff(); @@ -97,6 +99,7 @@ static noinstr void enter_from_user_mode(void) #else static __always_inline void enter_from_user_mode(void) { + task_isolation_kernel_enter(); lockdep_hardirqs_off(CALLER_ADDR0); instrumentation_begin(); trace_hardirqs_off_finish(); @@ -161,6 +164,15 @@ static long syscall_trace_enter(struct pt_regs *regs) return -1L; } + /* + * In task isolation mode, we may prevent the syscall from + * running, and if so we also deliver a signal to the process. + */ + if (work & _TIF_TASK_ISOLATION) { + if (task_isolation_syscall(regs->orig_ax) == -1) + return -1L; + work &= ~_TIF_TASK_ISOLATION; + } #ifdef CONFIG_SECCOMP /* * Do seccomp after ptrace, to catch any tracer changes. @@ -263,6 +275,8 @@ static void __prepare_exit_to_usermode(struct pt_regs *regs) lockdep_assert_irqs_disabled(); lockdep_sys_exit(); + task_isolation_check_run_cleanup(); + cached_flags = READ_ONCE(ti->flags); if (unlikely(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS)) @@ -278,6 +292,9 @@ static void __prepare_exit_to_usermode(struct pt_regs *regs) if (unlikely(cached_flags & _TIF_NEED_FPU_LOAD)) switch_fpu_return(); + if (cached_flags & _TIF_TASK_ISOLATION) + task_isolation_start(); + #ifdef CONFIG_COMPAT /* * Compat syscalls set TS_COMPAT. Make sure we clear it before @@ -597,7 +614,8 @@ bool noinstr idtentry_enter_cond_rcu(struct pt_regs *regs) check_user_regs(regs); enter_from_user_mode(); return false; - } + } else + task_isolation_kernel_enter(); /* * If this entry hit the idle task invoke rcu_irq_enter() whether diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index 7f828fe49797..5be6ca0519fc 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -4,6 +4,7 @@ #include <asm/alternative.h> #include <asm/nops.h> +#include <asm/processor.h> /* * Force strict CPU ordering. @@ -53,6 +54,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, #define dma_rmb() barrier() #define dma_wmb() barrier() +#define instr_sync() sync_core() #ifdef CONFIG_X86_32 #define __smp_mb() asm volatile("lock; addl $0,-4(%%esp)" ::: "memory", "cc") diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 8de8ceccb8bc..6dd1a5cc286d 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -93,6 +93,7 @@ struct thread_info { #define TIF_NOTSC 16 /* TSC is not accessible in userland */ #define TIF_IA32 17 /* IA32 compatibility process */ #define TIF_SLD 18 /* Restore split lock detection on context switch */ +#define TIF_TASK_ISOLATION 19 /* task isolation enabled for task */ #define TIF_MEMDIE 20 /* is terminating due to OOM killer */ #define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */ #define TIF_IO_BITMAP 22 /* uses I/O bitmap */ @@ -123,6 +124,7 @@ struct thread_info { #define _TIF_NOTSC (1 << TIF_NOTSC) #define _TIF_IA32 (1 << TIF_IA32) #define _TIF_SLD (1 << TIF_SLD) +#define _TIF_TASK_ISOLATION (1 << TIF_TASK_ISOLATION) #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) #define _TIF_FORCED_TF (1 << TIF_FORCED_TF) @@ -136,7 +138,7 @@ struct thread_info { /* Work to do before invoking the actual syscall. */ #define _TIF_WORK_SYSCALL_ENTRY \ (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT | \ - _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT) + _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | _TIF_TASK_ISOLATION) /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW_BASE \ diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c index 6ca0f91372fd..b4dfaad6a440 100644 --- a/arch/x86/kernel/apic/ipi.c +++ b/arch/x86/kernel/apic/ipi.c @@ -2,6 +2,7 @@ #include <linux/cpumask.h> #include <linux/smp.h> +#include <linux/isolation.h> #include "local.h" @@ -67,6 +68,7 @@ void native_smp_send_reschedule(int cpu) WARN(1, "sched: Unexpected reschedule of offline CPU#%d!\n", cpu); return; } + task_isolation_remote(cpu, "reschedule IPI"); apic->send_IPI(cpu, RESCHEDULE_VECTOR); } diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 1ead568c0101..e16a4f5c7e57 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -18,6 +18,7 @@ #include <linux/uaccess.h> /* faulthandler_disabled() */ #include <linux/efi.h> /* efi_recover_from_page_fault()*/ #include <linux/mm_types.h> +#include <linux/isolation.h> /* task_isolation_interrupt */ #include <asm/cpufeature.h> /* boot_cpu_has, ... */ #include <asm/traps.h> /* dotraplinkage, ... */ @@ -1332,6 +1333,9 @@ void do_user_addr_fault(struct pt_regs *regs, perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address); } + /* No signal was generated, but notify task-isolation tasks. */ + task_isolation_interrupt("page fault at %#lx", address); + check_v8086_mode(regs, address, tsk); } NOKPROBE_SYMBOL(do_user_addr_fault); diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 2097fa0ebdb5..9a3a9bae7d06 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -4,6 +4,7 @@ #include <linux/slab.h> #include <linux/cpumask.h> #include <linux/percpu.h> +#include <linux/isolation.h> #include <xen/events.h> @@ -265,6 +266,7 @@ void xen_send_IPI_allbutself(int vector) static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id) { + task_isolation_kernel_enter(); irq_enter(); generic_smp_call_function_interrupt(); inc_irq_stat(irq_call_count); @@ -275,6 +277,7 @@ static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id) static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id) { + task_isolation_kernel_enter(); irq_enter(); generic_smp_call_function_single_interrupt(); inc_irq_stat(irq_call_count); diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 171aff1b11f2..d71d3cc36c51 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -24,6 +24,7 @@ #include <linux/cpuhotplug.h> #include <linux/stackprotector.h> #include <linux/pgtable.h> +#include <linux/isolation.h> #include <asm/paravirt.h> #include <asm/idtentry.h> @@ -482,6 +483,7 @@ static void xen_pv_stop_other_cpus(int wait) static irqreturn_t xen_irq_work_interrupt(int irq, void *dev_id) { + task_isolation_kernel_enter(); irq_enter(); irq_work_run(); inc_irq_stat(apic_irq_work_irqs); -- 2.26.2