This is an optimization. In task_work_add(), we have the following loop: do { head = READ_ONCE(task->task_works); if (unlikely(head == &work_exited)) return -ESRCH; work->next = head; } while (cmpxchg(&task->task_works, head, work) != head); If CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG, even in the nmi context, we are safe to call task_work_add(). In such cases, irq_work() can be avoided, to avoid the intermediate step to set up the task_work. Suggested-by: Jens Axboe <axboe@xxxxxxxxx> Cc: Rik van Riel <riel@xxxxxxxxxxx> Signed-off-by: Yonghong Song <yhs@xxxxxx> --- kernel/trace/bpf_trace.c | 52 +++++++++++++++++++++++++++------------- 1 file changed, 35 insertions(+), 17 deletions(-) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index db7b6194e38a..b7bb11c0e5b0 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -713,21 +713,26 @@ static void do_send_signal_work(struct callback_head *twork) kfree(twcb); } -static void add_send_signal_task_work(u32 sig, enum pid_type type) +static int add_send_signal_task_work(u32 sig, enum pid_type type) { struct send_signal_work_cb *twcb; + int ret; twcb = kzalloc(sizeof(*twcb), GFP_ATOMIC); if (!twcb) - return; + return -ENOMEM; twcb->sig = sig; twcb->type = type; init_task_work(&twcb->twork, do_send_signal_work); - if (task_work_add(current, &twcb->twork, true)) + ret = task_work_add(current, &twcb->twork, true); + if (ret) kfree(twcb); + + return ret; } +#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG struct send_signal_irq_work { struct irq_work irq_work; struct task_struct *task; @@ -748,10 +753,29 @@ static void do_bpf_send_signal(struct irq_work *entry) put_task_struct(work->task); } -static int bpf_send_signal_common(u32 sig, enum pid_type type) +static int add_send_signal_irq_work(u32 sig, enum pid_type type) { struct send_signal_irq_work *work = NULL; + work = this_cpu_ptr(&send_signal_work); + if (atomic_read(&work->irq_work.flags) & IRQ_WORK_BUSY) + return -EBUSY; + + /* Add the current task, which is the target of sending signal, + * to the irq_work. The current task may change when queued + * irq works get executed. + */ + work->task = get_task_struct(current); + work->sig = sig; + work->type = type; + irq_work_queue(&work->irq_work); + + return 0; +} +#endif + +static int bpf_send_signal_common(u32 sig, enum pid_type type) +{ /* Similar to bpf_probe_write_user, task needs to be * in a sound condition and kernel memory access be * permitted in order to send signal to the current @@ -771,19 +795,11 @@ static int bpf_send_signal_common(u32 sig, enum pid_type type) if (unlikely(!valid_signal(sig))) return -EINVAL; - work = this_cpu_ptr(&send_signal_work); - if (atomic_read(&work->irq_work.flags) & IRQ_WORK_BUSY) - return -EBUSY; - - /* Add the current task, which is the target of sending signal, - * to the irq_work. The current task may change when queued - * irq works get executed. - */ - work->task = get_task_struct(current); - work->sig = sig; - work->type = type; - irq_work_queue(&work->irq_work); - return 0; +#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG + return add_send_signal_irq_work(sig, type); +#else + return add_send_signal_task_work(sig, type); +#endif } return group_send_sig_info(sig, SEND_SIG_PRIV, current, type); @@ -1673,6 +1689,7 @@ int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, return err; } +#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG static int __init send_signal_irq_work_init(void) { int cpu; @@ -1686,6 +1703,7 @@ static int __init send_signal_irq_work_init(void) } subsys_initcall(send_signal_irq_work_init); +#endif #ifdef CONFIG_MODULES static int bpf_event_notify(struct notifier_block *nb, unsigned long op, -- 2.17.1