Re: [PATCH RT 1/3] eventfd: Make signal recursion protection a task bit

Daniel Vacek <neelx.g@xxxxxxxxx> · Thu, 3 Feb 2022 10:16:56 +0100

Actually I see it was fixed later with
4b3749865374899e115aa8c48681709b086fe6d3 so I guess it should be
included as well.

--nX

On Thu, Feb 3, 2022 at 9:59 AM Daniel Vacek <neelx.g@xxxxxxxxx> wrote:
>
> Hi,
>
> OT: Btw for some reason you're sending a copy to
> "stable-rt@xxxxxxxxxxxxxxx"@redhat.com.
>
> On Fri, Jan 7, 2022 at 8:04 PM Luis Claudio R. Goncalves
> <lgoncalv@xxxxxxxxxx> wrote:
> >
> > From: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
> >
> > v5.10.90-rt61-rc1 stable review patch.
> > If anyone has any objections, please let me know.
> >
> > -----------
> >
> >
> > Upstream commit b542e383d8c005f06a131e2b40d5889b812f19c6
> >
> > The recursion protection for eventfd_signal() is based on a per CPU
> > variable and relies on the !RT semantics of spin_lock_irqsave() for
> > protecting this per CPU variable. On RT kernels spin_lock_irqsave() neither
> > disables preemption nor interrupts which allows the spin lock held section
> > to be preempted. If the preempting task invokes eventfd_signal() as well,
> > then the recursion warning triggers.
> >
> > Paolo suggested to protect the per CPU variable with a local lock, but
> > that's heavyweight and actually not necessary. The goal of this protection
> > is to prevent the task stack from overflowing, which can be achieved with a
> > per task recursion protection as well.
> >
> > Replace the per CPU variable with a per task bit similar to other recursion
> > protection bits like task_struct::in_page_owner. This works on both !RT and
> > RT kernels and removes as a side effect the extra per CPU storage.
> >
> > No functional change for !RT kernels.
> >
> > Reported-by: Daniel Bristot de Oliveira <bristot@xxxxxxxxxx>
> > Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
> > Tested-by: Daniel Bristot de Oliveira <bristot@xxxxxxxxxx>
> > Acked-by: Jason Wang <jasowang@xxxxxxxxxx>
> > Cc: Al Viro <viro@xxxxxxxxxxxxxxxxxx>
> > Link: https://lore.kernel.org/r/87wnp9idso.ffs@tglx
> > Signed-off-by: Sebastian Andrzej Siewior <bigeasy@xxxxxxxxxxxxx>
> > ---
> >  fs/aio.c                |  2 +-
> >  fs/eventfd.c            | 12 +++++-------
> >  include/linux/eventfd.h | 11 +++++------
> >  include/linux/sched.h   |  4 ++++
> >  4 files changed, 15 insertions(+), 14 deletions(-)
> >
> > diff --git a/fs/aio.c b/fs/aio.c
> > index c72b2c51b446c..f7d47c9ff6deb 100644
> > --- a/fs/aio.c
> > +++ b/fs/aio.c
> > @@ -1761,7 +1761,7 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
> >                 list_del_init(&req->wait.entry);
> >                 list_del(&iocb->ki_list);
> >                 iocb->ki_res.res = mangle_poll(mask);
> > -               if (iocb->ki_eventfd && eventfd_signal_count()) {
> > +               if (iocb->ki_eventfd && eventfd_signal_allowed()) {
>
> IIUC, the logic changed and this should read
> !eventfd_signal_allowed(). Or am I missing something here?
>
> Actually checking other stable branches and upstream, this looks to be
> a typo only in v5.10.90-rt61-rc1
>
> --nX
>
>
>
>
> >                         iocb = NULL;
> >                         INIT_WORK(&req->work, aio_poll_put_work);
> >                         schedule_work(&req->work);
> > diff --git a/fs/eventfd.c b/fs/eventfd.c
> > index df466ef81dddf..9035ca60bfcf3 100644
> > --- a/fs/eventfd.c
> > +++ b/fs/eventfd.c
> > @@ -25,8 +25,6 @@
> >  #include <linux/idr.h>
> >  #include <linux/uio.h>
> >
> > -DEFINE_PER_CPU(int, eventfd_wake_count);
> > -
> >  static DEFINE_IDA(eventfd_ida);
> >
> >  struct eventfd_ctx {
> > @@ -67,21 +65,21 @@ __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n)
> >          * Deadlock or stack overflow issues can happen if we recurse here
> >          * through waitqueue wakeup handlers. If the caller users potentially
> >          * nested waitqueues with custom wakeup handlers, then it should
> > -        * check eventfd_signal_count() before calling this function. If
> > -        * it returns true, the eventfd_signal() call should be deferred to a
> > +        * check eventfd_signal_allowed() before calling this function. If
> > +        * it returns false, the eventfd_signal() call should be deferred to a
> >          * safe context.
> >          */
> > -       if (WARN_ON_ONCE(this_cpu_read(eventfd_wake_count)))
> > +       if (WARN_ON_ONCE(current->in_eventfd_signal))
> >                 return 0;
> >
> >         spin_lock_irqsave(&ctx->wqh.lock, flags);
> > -       this_cpu_inc(eventfd_wake_count);
> > +       current->in_eventfd_signal = 1;
> >         if (ULLONG_MAX - ctx->count < n)
> >                 n = ULLONG_MAX - ctx->count;
> >         ctx->count += n;
> >         if (waitqueue_active(&ctx->wqh))
> >                 wake_up_locked_poll(&ctx->wqh, EPOLLIN);
> > -       this_cpu_dec(eventfd_wake_count);
> > +       current->in_eventfd_signal = 0;
> >         spin_unlock_irqrestore(&ctx->wqh.lock, flags);
> >
> >         return n;
> > diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h
> > index dc4fd8a6644dd..836b4c021a0a4 100644
> > --- a/include/linux/eventfd.h
> > +++ b/include/linux/eventfd.h
> > @@ -14,6 +14,7 @@
> >  #include <linux/err.h>
> >  #include <linux/percpu-defs.h>
> >  #include <linux/percpu.h>
> > +#include <linux/sched.h>
> >
> >  /*
> >   * CAREFUL: Check include/uapi/asm-generic/fcntl.h when defining
> > @@ -42,11 +43,9 @@ __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n);
> >  int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait,
> >                                   __u64 *cnt);
> >
> > -DECLARE_PER_CPU(int, eventfd_wake_count);
> > -
> > -static inline bool eventfd_signal_count(void)
> > +static inline bool eventfd_signal_allowed(void)
> >  {
> > -       return this_cpu_read(eventfd_wake_count);
> > +       return !current->in_eventfd_signal;
> >  }
> >
> >  #else /* CONFIG_EVENTFD */
> > @@ -77,9 +76,9 @@ static inline int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx,
> >         return -ENOSYS;
> >  }
> >
> > -static inline bool eventfd_signal_count(void)
> > +static inline bool eventfd_signal_allowed(void)
> >  {
> > -       return false;
> > +       return true;
> >  }
> >
> >  #endif
> > diff --git a/include/linux/sched.h b/include/linux/sched.h
> > index 409a24036952c..29e6ff1af1df9 100644
> > --- a/include/linux/sched.h
> > +++ b/include/linux/sched.h
> > @@ -852,6 +852,10 @@ struct task_struct {
> >         /* Stalled due to lack of memory */
> >         unsigned                        in_memstall:1;
> >  #endif
> > +#ifdef CONFIG_EVENTFD
> > +       /* Recursion prevention for eventfd_signal() */
> > +       unsigned                        in_eventfd_signal:1;
> > +#endif
> >
> >         unsigned long                   atomic_flags; /* Flags requiring atomic access. */
> >
> > --
> > 2.33.1
> >