One major purpose of the ChromeOS mmap_test is to avoid buffer copying. What is the concern for scanning out from GTT on APUs? Sam > -----Original Message----- > From: Koenig, Christian > Sent: Wednesday, November 29, 2017 9:54 AM > To: Li, Samuel <Samuel.Li at amd.com>; amd-gfx at lists.freedesktop.org > Subject: Re: [PATCH 1/1] drm/amdgpu: Pin to gtt before cpu accesses dma > buf. > > And exactly that's the reason why it is a no-go. > > Scanning out from GTT isn't supported at the moment. > > Christian. > > Am 29.11.2017 um 15:48 schrieb Li, Samuel: > > Actually it needs to be pinned here, since otherwise page flip will pin it into > vram. > > > > SAm > > > > > >> -----Original Message----- > >> From: Christian König [mailto:ckoenig.leichtzumerken at gmail.com] > >> Sent: Wednesday, November 29, 2017 4:39 AM > >> To: Li, Samuel <Samuel.Li at amd.com>; amd-gfx at lists.freedesktop.org > >> Subject: Re: [PATCH 1/1] drm/amdgpu: Pin to gtt before cpu accesses > >> dma buf. > >> > >> Am 29.11.2017 um 01:20 schrieb Samuel Li: > >>> To improve cpu read performance. This is implemented for APUs > currently. > >> And once more a NAK for this approach. > >> > >> What we could do is migrate the BO to GTT during mmap, but pinning it > >> is out of question. > >> > >> Regards, > >> Christian. > >> > >>> Change-Id: I300a7daed8f2b0ba6be71a43196a6b8617ddc62e > >>> --- > >>> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 + > >>> drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 10 +- > >>> drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 +- > >>> drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c | 108 > >> ++++++++++++++++++++++ > >>> drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 8 +- > >>> 5 files changed, 126 insertions(+), 4 deletions(-) > >>> > >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > >>> index f8657c3..193db70 100644 > >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > >>> @@ -417,6 +417,8 @@ amdgpu_gem_prime_import_sg_table(struct > >> drm_device *dev, > >>> struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, > >>> struct drm_gem_object *gobj, > >>> int flags); > >>> +struct drm_gem_object *amdgpu_gem_prime_import(struct > drm_device > >> *dev, > >>> + struct dma_buf *dma_buf); > >>> int amdgpu_gem_prime_pin(struct drm_gem_object *obj); > >>> void amdgpu_gem_prime_unpin(struct drm_gem_object *obj); > >>> struct reservation_object *amdgpu_gem_prime_res_obj(struct > >>> drm_gem_object *); diff --git > >>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c > >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c > >>> index d704a45..b5483e4 100644 > >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c > >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c > >>> @@ -147,6 +147,7 @@ int amdgpu_crtc_page_flip_target(struct > drm_crtc > >> *crtc, > >>> struct drm_device *dev = crtc->dev; > >>> struct amdgpu_device *adev = dev->dev_private; > >>> struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); > >>> + bool gtt_scannable = (adev->asic_type >= CHIP_CARRIZO && adev- > >>> flags > >>> +& AMD_IS_APU); > >>> struct amdgpu_framebuffer *old_amdgpu_fb; > >>> struct amdgpu_framebuffer *new_amdgpu_fb; > >>> struct drm_gem_object *obj; > >>> @@ -190,8 +191,13 @@ int amdgpu_crtc_page_flip_target(struct > >>> drm_crtc *crtc, > >>> > >>> r = amdgpu_bo_pin(new_abo, AMDGPU_GEM_DOMAIN_VRAM, > >> &base); > >>> if (unlikely(r != 0)) { > >>> - DRM_ERROR("failed to pin new abo buffer before flip\n"); > >>> - goto unreserve; > >>> + /* latest APUs support gtt scan out */ > >>> + if (gtt_scannable) > >>> + r = amdgpu_bo_pin(new_abo, > >> AMDGPU_GEM_DOMAIN_GTT, &base); > >>> + if (unlikely(r != 0)) { > >>> + DRM_ERROR("failed to pin new abo buffer before > >> flip\n"); > >>> + goto unreserve; > >>> + } > >>> } > >>> > >>> r = reservation_object_get_fences_rcu(new_abo->tbo.resv, > >>> &work->excl, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > >>> index 31383e0..df30b08 100644 > >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > >>> @@ -868,7 +868,7 @@ static struct drm_driver kms_driver = { > >>> .prime_handle_to_fd = drm_gem_prime_handle_to_fd, > >>> .prime_fd_to_handle = drm_gem_prime_fd_to_handle, > >>> .gem_prime_export = amdgpu_gem_prime_export, > >>> - .gem_prime_import = drm_gem_prime_import, > >>> + .gem_prime_import = amdgpu_gem_prime_import, > >>> .gem_prime_pin = amdgpu_gem_prime_pin, > >>> .gem_prime_unpin = amdgpu_gem_prime_unpin, > >>> .gem_prime_res_obj = amdgpu_gem_prime_res_obj, diff --git > >>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c > >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c > >>> index ae9c106..9e1424d 100644 > >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c > >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c > >>> @@ -164,6 +164,82 @@ struct reservation_object > >> *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj) > >>> return bo->tbo.resv; > >>> } > >>> > >>> +static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf, > >> enum > >>> +dma_data_direction direction) { > >>> + struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv); > >>> + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); > >>> + long i, ret = 0; > >>> + unsigned old_count; > >>> + bool reads = (direction == DMA_BIDIRECTIONAL || direction == > >> DMA_FROM_DEVICE); > >>> + bool gtt_scannable = (adev->asic_type >= CHIP_CARRIZO && adev- > >>> flags & AMD_IS_APU); > >>> + u32 domain; > >>> + > >>> + if (!reads || !gtt_scannable) > >>> + return 0; > >>> + > >>> + ret = amdgpu_bo_reserve(bo, false); > >>> + if (unlikely(ret != 0)) > >>> + return ret; > >>> + > >>> + /* > >>> + * Wait for all shared fences to complete before we switch to future > >>> + * use of exclusive fence on this prime shared bo. > >>> + */ > >>> + ret = reservation_object_wait_timeout_rcu(bo->tbo.resv, true, false, > >>> + MAX_SCHEDULE_TIMEOUT); > >>> + > >>> + if (unlikely(ret < 0)) { > >>> + DRM_DEBUG_PRIME("Fence wait failed: %li\n", ret); > >>> + amdgpu_bo_unreserve(bo); > >>> + return ret; > >>> + } > >>> + > >>> + ret = 0; > >>> + /* Pin to gtt */ > >>> + domain = amdgpu_mem_type_to_domain(bo- > >>> tbo.mem.mem_type); > >>> + if (domain == AMDGPU_GEM_DOMAIN_VRAM) { > >>> + old_count = bo->pin_count; > >>> + for (i = 0; i < old_count; i++) > >>> + amdgpu_bo_unpin(bo); > >>> + for (i = 0; i < old_count; i++) { > >>> + ret = amdgpu_bo_pin(bo, > >> AMDGPU_GEM_DOMAIN_GTT, NULL); > >>> + if (unlikely(ret != 0)) > >>> + break; > >>> + } > >>> + } > >>> + if (ret == 0) > >>> + ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT, > >> NULL); > >>> + > >>> + amdgpu_bo_unreserve(bo); > >>> + return ret; > >>> +} > >>> + > >>> +static int amdgpu_gem_end_cpu_access(struct dma_buf *dma_buf, > >> enum > >>> +dma_data_direction direction) { > >>> + struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv); > >>> + int ret = 0; > >>> + bool reads = (direction == DMA_BIDIRECTIONAL || direction == > >> DMA_FROM_DEVICE); > >>> + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); > >>> + bool gtt_scannable = (adev->asic_type >= CHIP_CARRIZO && adev- > >>> flags > >>> +& AMD_IS_APU); > >>> + > >>> + if (!reads || !gtt_scannable) > >>> + return 0; > >>> + > >>> + mb(); > >>> + ret = amdgpu_bo_reserve(bo, true); > >>> + if (unlikely(ret != 0)) > >>> + return ret; > >>> + > >>> + amdgpu_bo_unpin(bo); > >>> + > >>> + amdgpu_bo_unreserve(bo); > >>> + > >>> + return 0; > >>> +} > >>> + > >>> +static struct dma_buf_ops amdgpu_dmabuf_ops; static atomic_t > >>> +aops_lock; > >>> + > >>> struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, > >>> struct drm_gem_object *gobj, > >>> int flags) > >>> @@ -178,5 +254,37 @@ struct dma_buf > >> *amdgpu_gem_prime_export(struct drm_device *dev, > >>> buf = drm_gem_prime_export(dev, gobj, flags); > >>> if (!IS_ERR(buf)) > >>> buf->file->f_mapping = dev->anon_inode->i_mapping; > >>> + > >>> + while (amdgpu_dmabuf_ops.begin_cpu_access != > >> amdgpu_gem_begin_cpu_access || > >>> + amdgpu_dmabuf_ops.end_cpu_access != > >> amdgpu_gem_end_cpu_access ) > >>> + { > >>> + if (!atomic_cmpxchg(&aops_lock, 0, 1)) { > >>> + amdgpu_dmabuf_ops = *(buf->ops); > >>> + amdgpu_dmabuf_ops.begin_cpu_access = > >> amdgpu_gem_begin_cpu_access; > >>> + amdgpu_dmabuf_ops.end_cpu_access = > >> amdgpu_gem_end_cpu_access; > >>> + } > >>> + } > >>> + buf->ops = &amdgpu_dmabuf_ops; > >>> + > >>> return buf; > >>> } > >>> + > >>> +struct drm_gem_object *amdgpu_gem_prime_import(struct > drm_device > >> *dev, > >>> + struct dma_buf *dma_buf) > >>> +{ > >>> + struct drm_gem_object *obj; > >>> + > >>> + if (dma_buf->ops == &amdgpu_dmabuf_ops) { > >>> + obj = dma_buf->priv; > >>> + if (obj->dev == dev) { > >>> + /* > >>> + * Importing dmabuf exported from out own gem > >> increases > >>> + * refcount on gem itself instead of f_count of > >> dmabuf. > >>> + */ > >>> + drm_gem_object_get(obj); > >>> + return obj; > >>> + } > >>> + } > >>> + > >>> + return drm_gem_prime_import(dev, dma_buf); } > >>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > >>> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > >>> index a3bf021..f25b830 100644 > >>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > >>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > >>> @@ -2987,6 +2987,8 @@ static int dm_plane_helper_prepare_fb(struct > >> drm_plane *plane, > >>> int r; > >>> struct dm_plane_state *dm_plane_state_new, > >> *dm_plane_state_old; > >>> unsigned int awidth; > >>> + struct amdgpu_device *adev = plane->dev->dev_private; > >>> + bool gtt_scannable = (adev->asic_type >= CHIP_CARRIZO && adev- > >>> flags > >>> +& AMD_IS_APU); > >>> > >>> dm_plane_state_old = to_dm_plane_state(plane->state); > >>> dm_plane_state_new = to_dm_plane_state(new_state); @@ - > >> 3005,7 > >>> +3007,11 @@ static int dm_plane_helper_prepare_fb(struct drm_plane > >> *plane, > >>> return r; > >>> > >>> r = amdgpu_bo_pin(rbo, AMDGPU_GEM_DOMAIN_VRAM, &afb- > address); > >>> - > >>> + if (unlikely(r != 0)) { > >>> + /* latest APUs support gtt scan out */ > >>> + if (gtt_scannable) > >>> + r = amdgpu_bo_pin(rbo, > >> AMDGPU_GEM_DOMAIN_GTT, &afb->address); > >>> + } > >>> > >>> amdgpu_bo_unreserve(rbo); > >>>