Hi, OT: Btw for some reason you're sending a copy to "stable-rt@xxxxxxxxxxxxxxx"@redhat.com. On Fri, Jan 7, 2022 at 8:04 PM Luis Claudio R. Goncalves <lgoncalv@xxxxxxxxxx> wrote: > > From: Thomas Gleixner <tglx@xxxxxxxxxxxxx> > > v5.10.90-rt61-rc1 stable review patch. > If anyone has any objections, please let me know. > > ----------- > > > Upstream commit b542e383d8c005f06a131e2b40d5889b812f19c6 > > The recursion protection for eventfd_signal() is based on a per CPU > variable and relies on the !RT semantics of spin_lock_irqsave() for > protecting this per CPU variable. On RT kernels spin_lock_irqsave() neither > disables preemption nor interrupts which allows the spin lock held section > to be preempted. If the preempting task invokes eventfd_signal() as well, > then the recursion warning triggers. > > Paolo suggested to protect the per CPU variable with a local lock, but > that's heavyweight and actually not necessary. The goal of this protection > is to prevent the task stack from overflowing, which can be achieved with a > per task recursion protection as well. > > Replace the per CPU variable with a per task bit similar to other recursion > protection bits like task_struct::in_page_owner. This works on both !RT and > RT kernels and removes as a side effect the extra per CPU storage. > > No functional change for !RT kernels. > > Reported-by: Daniel Bristot de Oliveira <bristot@xxxxxxxxxx> > Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx> > Tested-by: Daniel Bristot de Oliveira <bristot@xxxxxxxxxx> > Acked-by: Jason Wang <jasowang@xxxxxxxxxx> > Cc: Al Viro <viro@xxxxxxxxxxxxxxxxxx> > Link: https://lore.kernel.org/r/87wnp9idso.ffs@tglx > Signed-off-by: Sebastian Andrzej Siewior <bigeasy@xxxxxxxxxxxxx> > --- > fs/aio.c | 2 +- > fs/eventfd.c | 12 +++++------- > include/linux/eventfd.h | 11 +++++------ > include/linux/sched.h | 4 ++++ > 4 files changed, 15 insertions(+), 14 deletions(-) > > diff --git a/fs/aio.c b/fs/aio.c > index c72b2c51b446c..f7d47c9ff6deb 100644 > --- a/fs/aio.c > +++ b/fs/aio.c > @@ -1761,7 +1761,7 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, > list_del_init(&req->wait.entry); > list_del(&iocb->ki_list); > iocb->ki_res.res = mangle_poll(mask); > - if (iocb->ki_eventfd && eventfd_signal_count()) { > + if (iocb->ki_eventfd && eventfd_signal_allowed()) { IIUC, the logic changed and this should read !eventfd_signal_allowed(). Or am I missing something here? Actually checking other stable branches and upstream, this looks to be a typo only in v5.10.90-rt61-rc1 --nX > iocb = NULL; > INIT_WORK(&req->work, aio_poll_put_work); > schedule_work(&req->work); > diff --git a/fs/eventfd.c b/fs/eventfd.c > index df466ef81dddf..9035ca60bfcf3 100644 > --- a/fs/eventfd.c > +++ b/fs/eventfd.c > @@ -25,8 +25,6 @@ > #include <linux/idr.h> > #include <linux/uio.h> > > -DEFINE_PER_CPU(int, eventfd_wake_count); > - > static DEFINE_IDA(eventfd_ida); > > struct eventfd_ctx { > @@ -67,21 +65,21 @@ __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n) > * Deadlock or stack overflow issues can happen if we recurse here > * through waitqueue wakeup handlers. If the caller users potentially > * nested waitqueues with custom wakeup handlers, then it should > - * check eventfd_signal_count() before calling this function. If > - * it returns true, the eventfd_signal() call should be deferred to a > + * check eventfd_signal_allowed() before calling this function. If > + * it returns false, the eventfd_signal() call should be deferred to a > * safe context. > */ > - if (WARN_ON_ONCE(this_cpu_read(eventfd_wake_count))) > + if (WARN_ON_ONCE(current->in_eventfd_signal)) > return 0; > > spin_lock_irqsave(&ctx->wqh.lock, flags); > - this_cpu_inc(eventfd_wake_count); > + current->in_eventfd_signal = 1; > if (ULLONG_MAX - ctx->count < n) > n = ULLONG_MAX - ctx->count; > ctx->count += n; > if (waitqueue_active(&ctx->wqh)) > wake_up_locked_poll(&ctx->wqh, EPOLLIN); > - this_cpu_dec(eventfd_wake_count); > + current->in_eventfd_signal = 0; > spin_unlock_irqrestore(&ctx->wqh.lock, flags); > > return n; > diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h > index dc4fd8a6644dd..836b4c021a0a4 100644 > --- a/include/linux/eventfd.h > +++ b/include/linux/eventfd.h > @@ -14,6 +14,7 @@ > #include <linux/err.h> > #include <linux/percpu-defs.h> > #include <linux/percpu.h> > +#include <linux/sched.h> > > /* > * CAREFUL: Check include/uapi/asm-generic/fcntl.h when defining > @@ -42,11 +43,9 @@ __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n); > int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait, > __u64 *cnt); > > -DECLARE_PER_CPU(int, eventfd_wake_count); > - > -static inline bool eventfd_signal_count(void) > +static inline bool eventfd_signal_allowed(void) > { > - return this_cpu_read(eventfd_wake_count); > + return !current->in_eventfd_signal; > } > > #else /* CONFIG_EVENTFD */ > @@ -77,9 +76,9 @@ static inline int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, > return -ENOSYS; > } > > -static inline bool eventfd_signal_count(void) > +static inline bool eventfd_signal_allowed(void) > { > - return false; > + return true; > } > > #endif > diff --git a/include/linux/sched.h b/include/linux/sched.h > index 409a24036952c..29e6ff1af1df9 100644 > --- a/include/linux/sched.h > +++ b/include/linux/sched.h > @@ -852,6 +852,10 @@ struct task_struct { > /* Stalled due to lack of memory */ > unsigned in_memstall:1; > #endif > +#ifdef CONFIG_EVENTFD > + /* Recursion prevention for eventfd_signal() */ > + unsigned in_eventfd_signal:1; > +#endif > > unsigned long atomic_flags; /* Flags requiring atomic access. */ > > -- > 2.33.1 >