On 2/3/22 11:24 AM, Usama Arif wrote: > -static inline bool io_should_trigger_evfd(struct io_ring_ctx *ctx) > +static void io_eventfd_signal(struct io_ring_ctx *ctx) > { > - if (likely(!ctx->cq_ev_fd)) > - return false; > + struct io_ev_fd *ev_fd; > + > + rcu_read_lock(); > + /* rcu_dereference ctx->io_ev_fd once and use it for both for checking and eventfd_signal */ > + ev_fd = rcu_dereference(ctx->io_ev_fd); > + > + if (likely(!ev_fd)) > + goto out; > if (READ_ONCE(ctx->rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED) > - return false; > - return !ctx->eventfd_async || io_wq_current_is_worker(); > + goto out; > + > + if (!ctx->eventfd_async || io_wq_current_is_worker()) > + eventfd_signal(ev_fd->cq_ev_fd, 1); > + > +out: > + rcu_read_unlock(); > } This still needs what we discussed in v3, something ala: /* * This will potential race with eventfd registration, but that's * always going to be the case if there is IO inflight while an eventfd * descriptor is being registered. */ if (!rcu_dereference_raw(ctx->io_ev_fd)) return; rcu_read_lock(); ... which I think is cheap enough and won't hit sparse complaints. The > @@ -9353,35 +9370,70 @@ static int __io_sqe_buffers_update(struct io_ring_ctx *ctx, > > static int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg) > { > + struct io_ev_fd *ev_fd; > __s32 __user *fds = arg; > - int fd; > + int fd, ret; > > - if (ctx->cq_ev_fd) > - return -EBUSY; > + mutex_lock(&ctx->ev_fd_lock); > + ret = -EBUSY; > + if (rcu_dereference_protected(ctx->io_ev_fd, lockdep_is_held(&ctx->ev_fd_lock))) { > + rcu_barrier(); > + if(rcu_dereference_protected(ctx->io_ev_fd, lockdep_is_held(&ctx->ev_fd_lock))) > + goto out; > + } I wonder if we can get away with assigning ctx->io_ev_fd to NULL when we do the call_rcu(). The struct itself will remain valid as long as we're under rcu_read_lock() protection, so I think we'd be fine? If we do that, then we don't need any rcu_barrier() or synchronize_rcu() calls, as we can register a new one while the previous one is still being killed. Hmm? > static int io_eventfd_unregister(struct io_ring_ctx *ctx) > { > - if (ctx->cq_ev_fd) { > - eventfd_ctx_put(ctx->cq_ev_fd); > - ctx->cq_ev_fd = NULL; > - return 0; > + struct io_ev_fd *ev_fd; > + int ret; > + > + mutex_lock(&ctx->ev_fd_lock); > + ev_fd = rcu_dereference_protected(ctx->io_ev_fd, lockdep_is_held(&ctx->ev_fd_lock)); > + if (ev_fd) { > + call_rcu(&ev_fd->rcu, io_eventfd_put); > + ret = 0; > + goto out; > } > + ret = -ENXIO; > > - return -ENXIO; > +out: > + mutex_unlock(&ctx->ev_fd_lock); > + return ret; > } I also think that'd be cleaner without the goto: { struct io_ev_fd *ev_fd; int ret; mutex_lock(&ctx->ev_fd_lock); ev_fd = rcu_dereference_protected(ctx->io_ev_fd, lockdep_is_held(&ctx->ev_fd_lock)); if (ev_fd) { call_rcu(&ev_fd->rcu, io_eventfd_put); mutex_unlock(&ctx->ev_fd_lock); return 0; } mutex_unlock(&ctx->ev_fd_lock); return -ENXIO; } -- Jens Axboe