Actually it needs to be pinned here, since otherwise page flip will pin it into vram. SAm > -----Original Message----- > From: Christian König [mailto:ckoenig.leichtzumerken at gmail.com] > Sent: Wednesday, November 29, 2017 4:39 AM > To: Li, Samuel <Samuel.Li at amd.com>; amd-gfx at lists.freedesktop.org > Subject: Re: [PATCH 1/1] drm/amdgpu: Pin to gtt before cpu accesses dma > buf. > > Am 29.11.2017 um 01:20 schrieb Samuel Li: > > To improve cpu read performance. This is implemented for APUs currently. > > And once more a NAK for this approach. > > What we could do is migrate the BO to GTT during mmap, but pinning it is out > of question. > > Regards, > Christian. > > > > > Change-Id: I300a7daed8f2b0ba6be71a43196a6b8617ddc62e > > --- > > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 + > > drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 10 +- > > drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 +- > > drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c | 108 > ++++++++++++++++++++++ > > drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 8 +- > > 5 files changed, 126 insertions(+), 4 deletions(-) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > > b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > > index f8657c3..193db70 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > > @@ -417,6 +417,8 @@ amdgpu_gem_prime_import_sg_table(struct > drm_device *dev, > > struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, > > struct drm_gem_object *gobj, > > int flags); > > +struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device > *dev, > > + struct dma_buf *dma_buf); > > int amdgpu_gem_prime_pin(struct drm_gem_object *obj); > > void amdgpu_gem_prime_unpin(struct drm_gem_object *obj); > > struct reservation_object *amdgpu_gem_prime_res_obj(struct > > drm_gem_object *); diff --git > > a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c > > index d704a45..b5483e4 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c > > @@ -147,6 +147,7 @@ int amdgpu_crtc_page_flip_target(struct drm_crtc > *crtc, > > struct drm_device *dev = crtc->dev; > > struct amdgpu_device *adev = dev->dev_private; > > struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); > > + bool gtt_scannable = (adev->asic_type >= CHIP_CARRIZO && adev- > >flags > > +& AMD_IS_APU); > > struct amdgpu_framebuffer *old_amdgpu_fb; > > struct amdgpu_framebuffer *new_amdgpu_fb; > > struct drm_gem_object *obj; > > @@ -190,8 +191,13 @@ int amdgpu_crtc_page_flip_target(struct drm_crtc > > *crtc, > > > > r = amdgpu_bo_pin(new_abo, AMDGPU_GEM_DOMAIN_VRAM, > &base); > > if (unlikely(r != 0)) { > > - DRM_ERROR("failed to pin new abo buffer before flip\n"); > > - goto unreserve; > > + /* latest APUs support gtt scan out */ > > + if (gtt_scannable) > > + r = amdgpu_bo_pin(new_abo, > AMDGPU_GEM_DOMAIN_GTT, &base); > > + if (unlikely(r != 0)) { > > + DRM_ERROR("failed to pin new abo buffer before > flip\n"); > > + goto unreserve; > > + } > > } > > > > r = reservation_object_get_fences_rcu(new_abo->tbo.resv, > > &work->excl, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > > index 31383e0..df30b08 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > > @@ -868,7 +868,7 @@ static struct drm_driver kms_driver = { > > .prime_handle_to_fd = drm_gem_prime_handle_to_fd, > > .prime_fd_to_handle = drm_gem_prime_fd_to_handle, > > .gem_prime_export = amdgpu_gem_prime_export, > > - .gem_prime_import = drm_gem_prime_import, > > + .gem_prime_import = amdgpu_gem_prime_import, > > .gem_prime_pin = amdgpu_gem_prime_pin, > > .gem_prime_unpin = amdgpu_gem_prime_unpin, > > .gem_prime_res_obj = amdgpu_gem_prime_res_obj, diff --git > > a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c > > index ae9c106..9e1424d 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c > > @@ -164,6 +164,82 @@ struct reservation_object > *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj) > > return bo->tbo.resv; > > } > > > > +static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf, > enum > > +dma_data_direction direction) { > > + struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv); > > + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); > > + long i, ret = 0; > > + unsigned old_count; > > + bool reads = (direction == DMA_BIDIRECTIONAL || direction == > DMA_FROM_DEVICE); > > + bool gtt_scannable = (adev->asic_type >= CHIP_CARRIZO && adev- > >flags & AMD_IS_APU); > > + u32 domain; > > + > > + if (!reads || !gtt_scannable) > > + return 0; > > + > > + ret = amdgpu_bo_reserve(bo, false); > > + if (unlikely(ret != 0)) > > + return ret; > > + > > + /* > > + * Wait for all shared fences to complete before we switch to future > > + * use of exclusive fence on this prime shared bo. > > + */ > > + ret = reservation_object_wait_timeout_rcu(bo->tbo.resv, true, false, > > + MAX_SCHEDULE_TIMEOUT); > > + > > + if (unlikely(ret < 0)) { > > + DRM_DEBUG_PRIME("Fence wait failed: %li\n", ret); > > + amdgpu_bo_unreserve(bo); > > + return ret; > > + } > > + > > + ret = 0; > > + /* Pin to gtt */ > > + domain = amdgpu_mem_type_to_domain(bo- > >tbo.mem.mem_type); > > + if (domain == AMDGPU_GEM_DOMAIN_VRAM) { > > + old_count = bo->pin_count; > > + for (i = 0; i < old_count; i++) > > + amdgpu_bo_unpin(bo); > > + for (i = 0; i < old_count; i++) { > > + ret = amdgpu_bo_pin(bo, > AMDGPU_GEM_DOMAIN_GTT, NULL); > > + if (unlikely(ret != 0)) > > + break; > > + } > > + } > > + if (ret == 0) > > + ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT, > NULL); > > + > > + amdgpu_bo_unreserve(bo); > > + return ret; > > +} > > + > > +static int amdgpu_gem_end_cpu_access(struct dma_buf *dma_buf, > enum > > +dma_data_direction direction) { > > + struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv); > > + int ret = 0; > > + bool reads = (direction == DMA_BIDIRECTIONAL || direction == > DMA_FROM_DEVICE); > > + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); > > + bool gtt_scannable = (adev->asic_type >= CHIP_CARRIZO && adev- > >flags > > +& AMD_IS_APU); > > + > > + if (!reads || !gtt_scannable) > > + return 0; > > + > > + mb(); > > + ret = amdgpu_bo_reserve(bo, true); > > + if (unlikely(ret != 0)) > > + return ret; > > + > > + amdgpu_bo_unpin(bo); > > + > > + amdgpu_bo_unreserve(bo); > > + > > + return 0; > > +} > > + > > +static struct dma_buf_ops amdgpu_dmabuf_ops; static atomic_t > > +aops_lock; > > + > > struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, > > struct drm_gem_object *gobj, > > int flags) > > @@ -178,5 +254,37 @@ struct dma_buf > *amdgpu_gem_prime_export(struct drm_device *dev, > > buf = drm_gem_prime_export(dev, gobj, flags); > > if (!IS_ERR(buf)) > > buf->file->f_mapping = dev->anon_inode->i_mapping; > > + > > + while (amdgpu_dmabuf_ops.begin_cpu_access != > amdgpu_gem_begin_cpu_access || > > + amdgpu_dmabuf_ops.end_cpu_access != > amdgpu_gem_end_cpu_access ) > > + { > > + if (!atomic_cmpxchg(&aops_lock, 0, 1)) { > > + amdgpu_dmabuf_ops = *(buf->ops); > > + amdgpu_dmabuf_ops.begin_cpu_access = > amdgpu_gem_begin_cpu_access; > > + amdgpu_dmabuf_ops.end_cpu_access = > amdgpu_gem_end_cpu_access; > > + } > > + } > > + buf->ops = &amdgpu_dmabuf_ops; > > + > > return buf; > > } > > + > > +struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device > *dev, > > + struct dma_buf *dma_buf) > > +{ > > + struct drm_gem_object *obj; > > + > > + if (dma_buf->ops == &amdgpu_dmabuf_ops) { > > + obj = dma_buf->priv; > > + if (obj->dev == dev) { > > + /* > > + * Importing dmabuf exported from out own gem > increases > > + * refcount on gem itself instead of f_count of > dmabuf. > > + */ > > + drm_gem_object_get(obj); > > + return obj; > > + } > > + } > > + > > + return drm_gem_prime_import(dev, dma_buf); } > > diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > > b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > > index a3bf021..f25b830 100644 > > --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > > +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > > @@ -2987,6 +2987,8 @@ static int dm_plane_helper_prepare_fb(struct > drm_plane *plane, > > int r; > > struct dm_plane_state *dm_plane_state_new, > *dm_plane_state_old; > > unsigned int awidth; > > + struct amdgpu_device *adev = plane->dev->dev_private; > > + bool gtt_scannable = (adev->asic_type >= CHIP_CARRIZO && adev- > >flags > > +& AMD_IS_APU); > > > > dm_plane_state_old = to_dm_plane_state(plane->state); > > dm_plane_state_new = to_dm_plane_state(new_state); @@ - > 3005,7 > > +3007,11 @@ static int dm_plane_helper_prepare_fb(struct drm_plane > *plane, > > return r; > > > > r = amdgpu_bo_pin(rbo, AMDGPU_GEM_DOMAIN_VRAM, &afb- > >address); > > - > > + if (unlikely(r != 0)) { > > + /* latest APUs support gtt scan out */ > > + if (gtt_scannable) > > + r = amdgpu_bo_pin(rbo, > AMDGPU_GEM_DOMAIN_GTT, &afb->address); > > + } > > > > amdgpu_bo_unreserve(rbo); > >