On Wed, Jun 30, 2021 at 2:36 PM Christian König <ckoenig.leichtzumerken@xxxxxxxxx> wrote: > > Daniel pointed me towards this function and there are multiple obvious problems > in the implementation. > > First of all the retry loop is not working as intended. In general the retry > makes only sense if you grab the reference first and then check the sequence > values. > > Then we should always also wait for the exclusive fence. > > It's also good practice to keep the reference around when installing callbacks > to fences you don't own. > > And last the whole implementation was unnecessary complex and rather hard to > understand which could lead to probably unexpected behavior of the IOCTL. > > Fix all this by reworking the implementation from scratch. Dropping the > whole RCU approach and taking the lock instead. > > Only mildly tested and needs a thoughtful review of the code. prime_vgem.c has some basic stuff, but it might actually encoding the broken behaviour. Would be good to extend/fix that I think so we don't entirely rely on review. We can't really build easily on top of the testcase Jason created for import/export, since for implicit sync we need some driver that attaches the fences for us. There's also a vc4 one, but I guess that's less useful for us :-) > v2: fix the reference counting as well > v3: keep the excl fence handling as is for stable > v4: back to testing all fences, drop RCU > > Signed-off-by: Christian König <christian.koenig@xxxxxxx> > CC: stable@xxxxxxxxxxxxxxx > --- > drivers/dma-buf/dma-buf.c | 132 +++++++++++++------------------------- > include/linux/dma-buf.h | 2 +- > 2 files changed, 46 insertions(+), 88 deletions(-) > > diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c > index eadd1eaa2fb5..192c4d34704b 100644 > --- a/drivers/dma-buf/dma-buf.c > +++ b/drivers/dma-buf/dma-buf.c > @@ -72,7 +72,7 @@ static void dma_buf_release(struct dentry *dentry) > * If you hit this BUG() it means someone dropped their ref to the > * dma-buf while still having pending operation to the buffer. > */ > - BUG_ON(dmabuf->cb_shared.active || dmabuf->cb_excl.active); > + BUG_ON(dmabuf->cb_in.active || dmabuf->cb_out.active); > > dmabuf->ops->release(dmabuf); > > @@ -202,16 +202,19 @@ static void dma_buf_poll_cb(struct dma_fence *fence, struct dma_fence_cb *cb) > wake_up_locked_poll(dcb->poll, dcb->active); > dcb->active = 0; > spin_unlock_irqrestore(&dcb->poll->lock, flags); > + dma_fence_put(fence); > } > > static __poll_t dma_buf_poll(struct file *file, poll_table *poll) > { > + struct dma_buf_poll_cb_t *dcb; > struct dma_buf *dmabuf; > struct dma_resv *resv; > struct dma_resv_list *fobj; > - struct dma_fence *fence_excl; > + struct dma_fence *fence; > + unsigned shared_count; > __poll_t events; > - unsigned shared_count, seq; > + int r, i; > > dmabuf = file->private_data; > if (!dmabuf || !dmabuf->resv) > @@ -225,101 +228,56 @@ static __poll_t dma_buf_poll(struct file *file, poll_table *poll) > if (!events) > return 0; > > -retry: > - seq = read_seqcount_begin(&resv->seq); > - rcu_read_lock(); > + dcb = events & EPOLLOUT ? &dmabuf->cb_out : &dmabuf->cb_in; > + > + /* Only queue a new one if we are not still waiting for the old one */ > + spin_lock_irq(&dmabuf->poll.lock); > + if (dcb->active) > + events = 0; > + else > + dcb->active = events; > + spin_unlock_irq(&dmabuf->poll.lock); > + if (!events) > + return 0; > + > + dma_resv_lock(resv, NULL); > > - fobj = rcu_dereference(resv->fence); > - if (fobj) > + fobj = dma_resv_get_list(resv); > + if (fobj && events & EPOLLOUT) > shared_count = fobj->shared_count; > else > shared_count = 0; > - fence_excl = rcu_dereference(resv->fence_excl); > - if (read_seqcount_retry(&resv->seq, seq)) { > - rcu_read_unlock(); > - goto retry; > - } > > - if (fence_excl && (!(events & EPOLLOUT) || shared_count == 0)) { > - struct dma_buf_poll_cb_t *dcb = &dmabuf->cb_excl; > - __poll_t pevents = EPOLLIN; > - > - if (shared_count == 0) > - pevents |= EPOLLOUT; > - > - spin_lock_irq(&dmabuf->poll.lock); > - if (dcb->active) { > - dcb->active |= pevents; > - events &= ~pevents; > - } else > - dcb->active = pevents; > - spin_unlock_irq(&dmabuf->poll.lock); > - > - if (events & pevents) { > - if (!dma_fence_get_rcu(fence_excl)) { > - /* force a recheck */ > - events &= ~pevents; > - dma_buf_poll_cb(NULL, &dcb->cb); > - } else if (!dma_fence_add_callback(fence_excl, &dcb->cb, > - dma_buf_poll_cb)) { > - events &= ~pevents; > - dma_fence_put(fence_excl); > - } else { > - /* > - * No callback queued, wake up any additional > - * waiters. > - */ > - dma_fence_put(fence_excl); > - dma_buf_poll_cb(NULL, &dcb->cb); > - } > + for (i = 0; i < shared_count; ++i) { > + fence = rcu_dereference_protected(fobj->shared[i], > + dma_resv_held(resv)); > + dma_fence_get(fence); > + r = dma_fence_add_callback(fence, &dcb->cb, dma_buf_poll_cb); > + if (!r) { > + /* Callback queued */ > + events = 0; Why do you clear events here? There's more than EPOLLIN/OUT, and I think you can also set both (and then we should be able to queue both). I think a few more testcases for this would be really good. I think the old code all handled that like I'd expect it to. Cheers, Daniel > + goto out; > } > + dma_fence_put(fence); > } > > - if ((events & EPOLLOUT) && shared_count > 0) { > - struct dma_buf_poll_cb_t *dcb = &dmabuf->cb_shared; > - int i; > - > - /* Only queue a new callback if no event has fired yet */ > - spin_lock_irq(&dmabuf->poll.lock); > - if (dcb->active) > - events &= ~EPOLLOUT; > - else > - dcb->active = EPOLLOUT; > - spin_unlock_irq(&dmabuf->poll.lock); > - > - if (!(events & EPOLLOUT)) > + fence = dma_resv_get_excl(resv); > + if (fence) { > + dma_fence_get(fence); > + r = dma_fence_add_callback(fence, &dcb->cb, dma_buf_poll_cb); > + if (!r) { > + /* Callback queued */ > + events = 0; > goto out; > - > - for (i = 0; i < shared_count; ++i) { > - struct dma_fence *fence = rcu_dereference(fobj->shared[i]); > - > - if (!dma_fence_get_rcu(fence)) { > - /* > - * fence refcount dropped to zero, this means > - * that fobj has been freed > - * > - * call dma_buf_poll_cb and force a recheck! > - */ > - events &= ~EPOLLOUT; > - dma_buf_poll_cb(NULL, &dcb->cb); > - break; > - } > - if (!dma_fence_add_callback(fence, &dcb->cb, > - dma_buf_poll_cb)) { > - dma_fence_put(fence); > - events &= ~EPOLLOUT; > - break; > - } > - dma_fence_put(fence); > } > - > - /* No callback queued, wake up any additional waiters. */ > - if (i == shared_count) > - dma_buf_poll_cb(NULL, &dcb->cb); > + dma_fence_put(fence); > } > > + /* No callback queued, wake up any additional waiters. */ > + dma_buf_poll_cb(NULL, &dcb->cb); > + > out: > - rcu_read_unlock(); > + dma_resv_unlock(resv); > return events; > } > > @@ -562,8 +520,8 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info) > dmabuf->owner = exp_info->owner; > spin_lock_init(&dmabuf->name_lock); > init_waitqueue_head(&dmabuf->poll); > - dmabuf->cb_excl.poll = dmabuf->cb_shared.poll = &dmabuf->poll; > - dmabuf->cb_excl.active = dmabuf->cb_shared.active = 0; > + dmabuf->cb_in.poll = dmabuf->cb_out.poll = &dmabuf->poll; > + dmabuf->cb_in.active = dmabuf->cb_out.active = 0; > > if (!resv) { > resv = (struct dma_resv *)&dmabuf[1]; > diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h > index efdc56b9d95f..7e747ad54c81 100644 > --- a/include/linux/dma-buf.h > +++ b/include/linux/dma-buf.h > @@ -329,7 +329,7 @@ struct dma_buf { > wait_queue_head_t *poll; > > __poll_t active; > - } cb_excl, cb_shared; > + } cb_in, cb_out; > }; > > /** > -- > 2.25.1 > -- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch