On Thu, Jun 6, 2024 at 2:19 AM Christian König <christian.koenig@xxxxxxx> wrote: > > Am 05.06.24 um 15:20 schrieb Alex Deucher: > > On Wed, Jun 5, 2024 at 8:32 AM Christian König > <ckoenig.leichtzumerken@xxxxxxxxx> wrote: > > This reverts commit b8c415e3bf989be1b749409951debe6b36f5c78c and > commit 425285d39afddaf4a9dab36045b816af0cc3e400. > > Taking a runtime pm reference for DMA-buf is actually completely > unnecessary. > > When the buffer is in GTT it is still accessible even when the GPU > is powered down and when it is in VRAM the buffer gets migrated to > GTT before powering down. > > Won't that kind of defeat the purpose of P2P DMA? I guess it's a > trade off between performance and power savings. > > > Not really. P2P is useful because ti avoids the extra bounce through system memory. > > But when the ASIC is powered down and not producing any new data there really is no extra bounce. > > The only use case which would make it mandatory to keep the runtime > pm reference would be if we pin the buffer into VRAM, and that's not > something we currently do. > > We'll need to bring this back if we ever support that? I think we'll > want that for P2P DMA with RDMA NICs that don't support ODP. That's > one of the big blockers for a lot of ROCm customers to migrate to the > in box drivers. > > > Yeah, but we need a completely different approach in that case. > > The problem is that calling pm_runtime_get_sync() from the DMA-buf callbacks is illegal in the first place because we have the reservation lock taken here which is also taken during resume. > > So this here never triggered or otherwise we would have seen a deadlock (I should probably mention that in the commit message). Thanks. With that added to the commit message, the patch is: Reviewed-by: Alex Deucher <alexander.deucher@xxxxxxx> > > Christian. > > > Alex > > Signed-off-by: Christian König <christian.koenig@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 33 --------------------- > drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 2 -- > drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 15 ---------- > 3 files changed, 50 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c > index 0b3b10d21952..ab848047204c 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c > @@ -41,8 +41,6 @@ > #include <linux/dma-buf.h> > #include <linux/dma-fence-array.h> > #include <linux/pci-p2pdma.h> > -#include <linux/pm_runtime.h> > -#include "amdgpu_trace.h" > > /** > * amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation > @@ -63,37 +61,7 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf, > if (pci_p2pdma_distance(adev->pdev, attach->dev, false) < 0) > attach->peer2peer = false; > > - r = pm_runtime_get_sync(adev_to_drm(adev)->dev); > - trace_amdgpu_runpm_reference_dumps(1, __func__); > - if (r < 0) > - goto out; > - > return 0; > - > -out: > - pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); > - trace_amdgpu_runpm_reference_dumps(0, __func__); > - return r; > -} > - > -/** > - * amdgpu_dma_buf_detach - &dma_buf_ops.detach implementation > - * > - * @dmabuf: DMA-buf where we remove the attachment from > - * @attach: the attachment to remove > - * > - * Called when an attachment is removed from the DMA-buf. > - */ > -static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf, > - struct dma_buf_attachment *attach) > -{ > - struct drm_gem_object *obj = dmabuf->priv; > - struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); > - struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); > - > - pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); > - pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); > - trace_amdgpu_runpm_reference_dumps(0, __func__); > } > > /** > @@ -266,7 +234,6 @@ static int amdgpu_dma_buf_begin_cpu_access(struct dma_buf *dma_buf, > > const struct dma_buf_ops amdgpu_dmabuf_ops = { > .attach = amdgpu_dma_buf_attach, > - .detach = amdgpu_dma_buf_detach, > .pin = amdgpu_dma_buf_pin, > .unpin = amdgpu_dma_buf_unpin, > .map_dma_buf = amdgpu_dma_buf_map, > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c > index 10832b470448..bc3ac73b6b8d 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c > @@ -181,7 +181,6 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amd > amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, > seq, flags | AMDGPU_FENCE_FLAG_INT); > pm_runtime_get_noresume(adev_to_drm(adev)->dev); > - trace_amdgpu_runpm_reference_dumps(1, __func__); > ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask]; > if (unlikely(rcu_dereference_protected(*ptr, 1))) { > struct dma_fence *old; > @@ -309,7 +308,6 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring) > dma_fence_put(fence); > pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); > pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); > - trace_amdgpu_runpm_reference_dumps(0, __func__); > } while (last_seq != seq); > > return true; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h > index f539b1d00234..2fd1bfb35916 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h > @@ -554,21 +554,6 @@ TRACE_EVENT(amdgpu_reset_reg_dumps, > __entry->value) > ); > > -TRACE_EVENT(amdgpu_runpm_reference_dumps, > - TP_PROTO(uint32_t index, const char *func), > - TP_ARGS(index, func), > - TP_STRUCT__entry( > - __field(uint32_t, index) > - __string(func, func) > - ), > - TP_fast_assign( > - __entry->index = index; > - __assign_str(func, func); > - ), > - TP_printk("amdgpu runpm reference dump 0x%x: 0x%s\n", > - __entry->index, > - __get_str(func)) > -); > #undef AMDGPU_JOB_GET_TIMELINE_NAME > #endif > > -- > 2.34.1 > >