This commit adds task isolation hooks as follows: - __handle_domain_irq() and handle_domain_nmi() generate an isolation warning for the local task - irq_work_queue_on() generates an isolation warning for the remote task being interrupted for irq_work (through __smp_call_single_queue()) - generic_exec_single() generates a remote isolation warning for the remote cpu being IPI'd (through __smp_call_single_queue()) - smp_call_function_many() generates a remote isolation warning for the set of remote cpus being IPI'd (through smp_call_function_many_cond()) - on_each_cpu_cond_mask() generates a remote isolation warning for the set of remote cpus being IPI'd (through smp_call_function_many_cond()) - __ttwu_queue_wakelist() generates a remote isolation warning for the remote cpu being IPI'd (through __smp_call_single_queue()) - nmi_enter(), __context_tracking_exit(), __handle_domain_irq(), handle_domain_nmi() and scheduler_ipi() clear low-level flags and synchronize CPUs by calling task_isolation_kernel_enter() Calls to task_isolation_remote() or task_isolation_interrupt() can be placed in the platform-independent code like this when doing so results in fewer lines of code changes, as for example is true of the users of the arch_send_call_function_*() APIs. Or, they can be placed in the per-architecture code when there are many callers, as for example is true of the smp_send_reschedule() call. A further cleanup might be to create an intermediate layer, so that for example smp_send_reschedule() is a single generic function that just calls arch_smp_send_reschedule(), allowing generic code to be called every time smp_send_reschedule() is invoked. But for now, we just update either callers or callees as makes most sense. Calls to task_isolation_kernel_enter() are intended for early kernel entry code. They may be called in platform-independent or platform-specific code. It may be possible to clean up low-level entry code and somehow organize calls to task_isolation_kernel_enter() to avoid multiple per-architecture or driver-specific calls to it. RCU initialization may be a good reference point for those places in kernel (task_isolation_kernel_enter() should precede it), however right now it is not unified between architectures. Signed-off-by: Chris Metcalf <cmetcalf@xxxxxxxxxxxx> [abelits@xxxxxxxxxxx: adapted for kernel 5.8, added low-level flags handling] Signed-off-by: Alex Belits <abelits@xxxxxxxxxxx> --- include/linux/hardirq.h | 2 ++ include/linux/sched.h | 2 ++ kernel/context_tracking.c | 4 ++++ kernel/irq/irqdesc.c | 13 +++++++++++++ kernel/smp.c | 6 +++++- 5 files changed, 26 insertions(+), 1 deletion(-) diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 03c9fece7d43..5aab1d0a580e 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -7,6 +7,7 @@ #include <linux/lockdep.h> #include <linux/ftrace_irq.h> #include <linux/vtime.h> +#include <linux/isolation.h> #include <asm/hardirq.h> extern void synchronize_irq(unsigned int irq); @@ -114,6 +115,7 @@ extern void rcu_nmi_exit(void); #define nmi_enter() \ do { \ arch_nmi_enter(); \ + task_isolation_kernel_enter(); \ printk_nmi_enter(); \ lockdep_off(); \ BUG_ON(in_nmi() == NMI_MASK); \ diff --git a/include/linux/sched.h b/include/linux/sched.h index 7fb7bb3fddaa..cacfa415dc59 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -32,6 +32,7 @@ #include <linux/posix-timers.h> #include <linux/rseq.h> #include <linux/kcsan.h> +#include <linux/isolation.h> /* task_struct member predeclarations (sorted alphabetically): */ struct audit_context; @@ -1743,6 +1744,7 @@ extern char *__get_task_comm(char *to, size_t len, struct task_struct *tsk); #ifdef CONFIG_SMP static __always_inline void scheduler_ipi(void) { + task_isolation_kernel_enter(); /* * Fold TIF_NEED_RESCHED into the preempt_count; anybody setting * TIF_NEED_RESCHED remotely (for the first time) will also send diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c index 36a98c48aedc..481a722ddbce 100644 --- a/kernel/context_tracking.c +++ b/kernel/context_tracking.c @@ -21,6 +21,7 @@ #include <linux/hardirq.h> #include <linux/export.h> #include <linux/kprobes.h> +#include <linux/isolation.h> #define CREATE_TRACE_POINTS #include <trace/events/context_tracking.h> @@ -148,6 +149,8 @@ void noinstr __context_tracking_exit(enum ctx_state state) if (!context_tracking_recursion_enter()) return; + task_isolation_kernel_enter(); + if (__this_cpu_read(context_tracking.state) == state) { if (__this_cpu_read(context_tracking.active)) { /* @@ -159,6 +162,7 @@ void noinstr __context_tracking_exit(enum ctx_state state) instrumentation_begin(); vtime_user_exit(current); trace_user_exit(0); + task_isolation_user_exit(); instrumentation_end(); } } diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 1a7723604399..b351aac7732f 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -16,6 +16,7 @@ #include <linux/bitmap.h> #include <linux/irqdomain.h> #include <linux/sysfs.h> +#include <linux/isolation.h> #include "internals.h" @@ -669,6 +670,8 @@ int __handle_domain_irq(struct irq_domain *domain, unsigned int hwirq, unsigned int irq = hwirq; int ret = 0; + task_isolation_kernel_enter(); + irq_enter(); #ifdef CONFIG_IRQ_DOMAIN @@ -676,6 +679,10 @@ int __handle_domain_irq(struct irq_domain *domain, unsigned int hwirq, irq = irq_find_mapping(domain, hwirq); #endif + task_isolation_interrupt((irq == hwirq) ? + "irq %d (%s)" : "irq %d (%s hwirq %d)", + irq, domain ? domain->name : "", hwirq); + /* * Some hardware gives randomly wrong interrupts. Rather * than crashing, do something sensible. @@ -710,6 +717,8 @@ int handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq, unsigned int irq; int ret = 0; + task_isolation_kernel_enter(); + /* * NMI context needs to be setup earlier in order to deal with tracing. */ @@ -717,6 +726,10 @@ int handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq, irq = irq_find_mapping(domain, hwirq); + task_isolation_interrupt((irq == hwirq) ? + "NMI irq %d (%s)" : "NMI irq %d (%s hwirq %d)", + irq, domain ? domain->name : "", hwirq); + /* * ack_bad_irq is not NMI-safe, just report * an invalid interrupt. diff --git a/kernel/smp.c b/kernel/smp.c index aa17eedff5be..6a6849783948 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -20,6 +20,7 @@ #include <linux/sched.h> #include <linux/sched/idle.h> #include <linux/hypervisor.h> +#include <linux/isolation.h> #include "smpboot.h" #include "sched/smp.h" @@ -146,8 +147,10 @@ void __smp_call_single_queue(int cpu, struct llist_node *node) * locking and barrier primitives. Generic code isn't really * equipped to do the right thing... */ - if (llist_add(node, &per_cpu(call_single_queue, cpu))) + if (llist_add(node, &per_cpu(call_single_queue, cpu))) { + task_isolation_remote(cpu, "IPI function"); send_call_function_single_ipi(cpu); + } } /* @@ -545,6 +548,7 @@ static void smp_call_function_many_cond(const struct cpumask *mask, } /* Send a message to all CPUs in the map */ + task_isolation_remote_cpumask(cfd->cpumask_ipi, "IPI function"); arch_send_call_function_ipi_mask(cfd->cpumask_ipi); if (wait) { -- 2.26.2