On Thu, Jun 27, 2024 at 8:34 AM Zack Rusin <zack.rusin@xxxxxxxxxxxx> wrote: > > Introduce a version of the fence ops that on release doesn't remove > the fence from the pending list, and thus doesn't require a lock to > fix poll->fence wait->fence unref deadlocks. > > vmwgfx overwrites the wait callback to iterate over the list of all > fences and update their status, to do that it holds a lock to prevent > the list modifcations from other threads. The fence destroy callback > both deletes the fence and removes it from the list of pending > fences, for which it holds a lock. > > dma buf polling cb unrefs a fence after it's been signaled: so the poll > calls the wait, which signals the fences, which are being destroyed. > The destruction tries to acquire the lock on the pending fences list > which it can never get because it's held by the wait from which it > was called. > > Old bug, but not a lot of userspace apps were using dma-buf polling > interfaces. Fix those, in particular this fixes KDE stalls/deadlock. > > Signed-off-by: Zack Rusin <zack.rusin@xxxxxxxxxxxx> > Fixes: 2298e804e96e ("drm/vmwgfx: rework to new fence interface, v2") > Cc: Broadcom internal kernel review list <bcm-kernel-feedback-list@xxxxxxxxxxxx> > Cc: dri-devel@xxxxxxxxxxxxxxxxxxxxx > Cc: <stable@xxxxxxxxxxxxxxx> # v6.2+ > --- > drivers/gpu/drm/vmwgfx/vmwgfx_fence.c | 26 ++++++++++++++++++++------ > 1 file changed, 20 insertions(+), 6 deletions(-) > > diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c > index 5efc6a766f64..76971ef7801a 100644 > --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c > +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c > @@ -32,7 +32,6 @@ > #define VMW_FENCE_WRAP (1 << 31) > > struct vmw_fence_manager { > - int num_fence_objects; > struct vmw_private *dev_priv; > spinlock_t lock; > struct list_head fence_list; > @@ -120,16 +119,23 @@ static void vmw_fence_goal_write(struct vmw_private *vmw, u32 value) > * objects with actions attached to them. > */ > > -static void vmw_fence_obj_destroy(struct dma_fence *f) > +static void vmw_fence_obj_destroy_removed(struct dma_fence *f) > { > struct vmw_fence_obj *fence = > container_of(f, struct vmw_fence_obj, base); > > + WARN_ON(!list_empty(&fence->head)); > + fence->destroy(fence); > +} > + > +static void vmw_fence_obj_destroy(struct dma_fence *f) > +{ > + struct vmw_fence_obj *fence = > + container_of(f, struct vmw_fence_obj, base); > struct vmw_fence_manager *fman = fman_from_fence(fence); > > spin_lock(&fman->lock); > list_del_init(&fence->head); > - --fman->num_fence_objects; > spin_unlock(&fman->lock); > fence->destroy(fence); > } > @@ -257,6 +263,13 @@ static const struct dma_fence_ops vmw_fence_ops = { > .release = vmw_fence_obj_destroy, > }; > > +static const struct dma_fence_ops vmw_fence_ops_removed = { > + .get_driver_name = vmw_fence_get_driver_name, > + .get_timeline_name = vmw_fence_get_timeline_name, > + .enable_signaling = vmw_fence_enable_signaling, > + .wait = vmw_fence_wait, > + .release = vmw_fence_obj_destroy_removed, > +}; > > /* > * Execute signal actions on fences recently signaled. > @@ -355,7 +368,6 @@ static int vmw_fence_obj_init(struct vmw_fence_manager *fman, > goto out_unlock; > } > list_add_tail(&fence->head, &fman->fence_list); > - ++fman->num_fence_objects; > > out_unlock: > spin_unlock(&fman->lock); > @@ -403,7 +415,7 @@ static bool vmw_fence_goal_new_locked(struct vmw_fence_manager *fman, > u32 passed_seqno) > { > u32 goal_seqno; > - struct vmw_fence_obj *fence; > + struct vmw_fence_obj *fence, *next_fence; > > if (likely(!fman->seqno_valid)) > return false; > @@ -413,7 +425,7 @@ static bool vmw_fence_goal_new_locked(struct vmw_fence_manager *fman, > return false; > > fman->seqno_valid = false; > - list_for_each_entry(fence, &fman->fence_list, head) { > + list_for_each_entry_safe(fence, next_fence, &fman->fence_list, head) { > if (!list_empty(&fence->seq_passed_actions)) { > fman->seqno_valid = true; > vmw_fence_goal_write(fman->dev_priv, > @@ -471,6 +483,7 @@ static void __vmw_fences_update(struct vmw_fence_manager *fman) > rerun: > list_for_each_entry_safe(fence, next_fence, &fman->fence_list, head) { > if (seqno - fence->base.seqno < VMW_FENCE_WRAP) { > + fence->base.ops = &vmw_fence_ops_removed; > list_del_init(&fence->head); > dma_fence_signal_locked(&fence->base); > INIT_LIST_HEAD(&action_list); > @@ -662,6 +675,7 @@ void vmw_fence_fifo_down(struct vmw_fence_manager *fman) > VMW_FENCE_WAIT_TIMEOUT); > > if (unlikely(ret != 0)) { > + fence->base.ops = &vmw_fence_ops_removed; > list_del_init(&fence->head); > dma_fence_signal(&fence->base); > INIT_LIST_HEAD(&action_list); > -- > 2.40.1 > Neat fix! Reviewed-by: Martin Krastev <martin.krastev@xxxxxxxxxxxx> Regards, Martin