> What is the concern for scanning out from GTT on APUs? It's simply not implemented yet. You need quite a bunch of different setup in DC for this to work. I've send out a WIP branch a for this a few weeks ago, but haven't worked on this in a while. BTW it is only supported on Carizzo and Raven. But even then pinning a BO to GTT for this would still be a no-go. We could just avoid the copy on scanout when the BO is already inside GTT because of the CPU access. In general we should rather work on this as Michel described and avoid creating the BO in VRAM in the first place if possible. Regards, Christian. Am 29.11.2017 um 15:56 schrieb Li, Samuel: > One major purpose of the ChromeOS mmap_test is to avoid buffer copying. What is the concern for scanning out from GTT on APUs? > > Sam > >> -----Original Message----- >> From: Koenig, Christian >> Sent: Wednesday, November 29, 2017 9:54 AM >> To: Li, Samuel <Samuel.Li at amd.com>; amd-gfx at lists.freedesktop.org >> Subject: Re: [PATCH 1/1] drm/amdgpu: Pin to gtt before cpu accesses dma >> buf. >> >> And exactly that's the reason why it is a no-go. >> >> Scanning out from GTT isn't supported at the moment. >> >> Christian. >> >> Am 29.11.2017 um 15:48 schrieb Li, Samuel: >>> Actually it needs to be pinned here, since otherwise page flip will pin it into >> vram. >>> SAm >>> >>> >>>> -----Original Message----- >>>> From: Christian König [mailto:ckoenig.leichtzumerken at gmail.com] >>>> Sent: Wednesday, November 29, 2017 4:39 AM >>>> To: Li, Samuel <Samuel.Li at amd.com>; amd-gfx at lists.freedesktop.org >>>> Subject: Re: [PATCH 1/1] drm/amdgpu: Pin to gtt before cpu accesses >>>> dma buf. >>>> >>>> Am 29.11.2017 um 01:20 schrieb Samuel Li: >>>>> To improve cpu read performance. This is implemented for APUs >> currently. >>>> And once more a NAK for this approach. >>>> >>>> What we could do is migrate the BO to GTT during mmap, but pinning it >>>> is out of question. >>>> >>>> Regards, >>>> Christian. >>>> >>>>> Change-Id: I300a7daed8f2b0ba6be71a43196a6b8617ddc62e >>>>> --- >>>>> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 + >>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 10 +- >>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 +- >>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c | 108 >>>> ++++++++++++++++++++++ >>>>> drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 8 +- >>>>> 5 files changed, 126 insertions(+), 4 deletions(-) >>>>> >>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h >>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h >>>>> index f8657c3..193db70 100644 >>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h >>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h >>>>> @@ -417,6 +417,8 @@ amdgpu_gem_prime_import_sg_table(struct >>>> drm_device *dev, >>>>> struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, >>>>> struct drm_gem_object *gobj, >>>>> int flags); >>>>> +struct drm_gem_object *amdgpu_gem_prime_import(struct >> drm_device >>>> *dev, >>>>> + struct dma_buf *dma_buf); >>>>> int amdgpu_gem_prime_pin(struct drm_gem_object *obj); >>>>> void amdgpu_gem_prime_unpin(struct drm_gem_object *obj); >>>>> struct reservation_object *amdgpu_gem_prime_res_obj(struct >>>>> drm_gem_object *); diff --git >>>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c >>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c >>>>> index d704a45..b5483e4 100644 >>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c >>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c >>>>> @@ -147,6 +147,7 @@ int amdgpu_crtc_page_flip_target(struct >> drm_crtc >>>> *crtc, >>>>> struct drm_device *dev = crtc->dev; >>>>> struct amdgpu_device *adev = dev->dev_private; >>>>> struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); >>>>> + bool gtt_scannable = (adev->asic_type >= CHIP_CARRIZO && adev- >>>>> flags >>>>> +& AMD_IS_APU); >>>>> struct amdgpu_framebuffer *old_amdgpu_fb; >>>>> struct amdgpu_framebuffer *new_amdgpu_fb; >>>>> struct drm_gem_object *obj; >>>>> @@ -190,8 +191,13 @@ int amdgpu_crtc_page_flip_target(struct >>>>> drm_crtc *crtc, >>>>> >>>>> r = amdgpu_bo_pin(new_abo, AMDGPU_GEM_DOMAIN_VRAM, >>>> &base); >>>>> if (unlikely(r != 0)) { >>>>> - DRM_ERROR("failed to pin new abo buffer before flip\n"); >>>>> - goto unreserve; >>>>> + /* latest APUs support gtt scan out */ >>>>> + if (gtt_scannable) >>>>> + r = amdgpu_bo_pin(new_abo, >>>> AMDGPU_GEM_DOMAIN_GTT, &base); >>>>> + if (unlikely(r != 0)) { >>>>> + DRM_ERROR("failed to pin new abo buffer before >>>> flip\n"); >>>>> + goto unreserve; >>>>> + } >>>>> } >>>>> >>>>> r = reservation_object_get_fences_rcu(new_abo->tbo.resv, >>>>> &work->excl, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c >>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c >>>>> index 31383e0..df30b08 100644 >>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c >>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c >>>>> @@ -868,7 +868,7 @@ static struct drm_driver kms_driver = { >>>>> .prime_handle_to_fd = drm_gem_prime_handle_to_fd, >>>>> .prime_fd_to_handle = drm_gem_prime_fd_to_handle, >>>>> .gem_prime_export = amdgpu_gem_prime_export, >>>>> - .gem_prime_import = drm_gem_prime_import, >>>>> + .gem_prime_import = amdgpu_gem_prime_import, >>>>> .gem_prime_pin = amdgpu_gem_prime_pin, >>>>> .gem_prime_unpin = amdgpu_gem_prime_unpin, >>>>> .gem_prime_res_obj = amdgpu_gem_prime_res_obj, diff --git >>>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c >>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c >>>>> index ae9c106..9e1424d 100644 >>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c >>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c >>>>> @@ -164,6 +164,82 @@ struct reservation_object >>>> *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj) >>>>> return bo->tbo.resv; >>>>> } >>>>> >>>>> +static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf, >>>> enum >>>>> +dma_data_direction direction) { >>>>> + struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv); >>>>> + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); >>>>> + long i, ret = 0; >>>>> + unsigned old_count; >>>>> + bool reads = (direction == DMA_BIDIRECTIONAL || direction == >>>> DMA_FROM_DEVICE); >>>>> + bool gtt_scannable = (adev->asic_type >= CHIP_CARRIZO && adev- >>>>> flags & AMD_IS_APU); >>>>> + u32 domain; >>>>> + >>>>> + if (!reads || !gtt_scannable) >>>>> + return 0; >>>>> + >>>>> + ret = amdgpu_bo_reserve(bo, false); >>>>> + if (unlikely(ret != 0)) >>>>> + return ret; >>>>> + >>>>> + /* >>>>> + * Wait for all shared fences to complete before we switch to future >>>>> + * use of exclusive fence on this prime shared bo. >>>>> + */ >>>>> + ret = reservation_object_wait_timeout_rcu(bo->tbo.resv, true, false, >>>>> + MAX_SCHEDULE_TIMEOUT); >>>>> + >>>>> + if (unlikely(ret < 0)) { >>>>> + DRM_DEBUG_PRIME("Fence wait failed: %li\n", ret); >>>>> + amdgpu_bo_unreserve(bo); >>>>> + return ret; >>>>> + } >>>>> + >>>>> + ret = 0; >>>>> + /* Pin to gtt */ >>>>> + domain = amdgpu_mem_type_to_domain(bo- >>>>> tbo.mem.mem_type); >>>>> + if (domain == AMDGPU_GEM_DOMAIN_VRAM) { >>>>> + old_count = bo->pin_count; >>>>> + for (i = 0; i < old_count; i++) >>>>> + amdgpu_bo_unpin(bo); >>>>> + for (i = 0; i < old_count; i++) { >>>>> + ret = amdgpu_bo_pin(bo, >>>> AMDGPU_GEM_DOMAIN_GTT, NULL); >>>>> + if (unlikely(ret != 0)) >>>>> + break; >>>>> + } >>>>> + } >>>>> + if (ret == 0) >>>>> + ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT, >>>> NULL); >>>>> + >>>>> + amdgpu_bo_unreserve(bo); >>>>> + return ret; >>>>> +} >>>>> + >>>>> +static int amdgpu_gem_end_cpu_access(struct dma_buf *dma_buf, >>>> enum >>>>> +dma_data_direction direction) { >>>>> + struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv); >>>>> + int ret = 0; >>>>> + bool reads = (direction == DMA_BIDIRECTIONAL || direction == >>>> DMA_FROM_DEVICE); >>>>> + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); >>>>> + bool gtt_scannable = (adev->asic_type >= CHIP_CARRIZO && adev- >>>>> flags >>>>> +& AMD_IS_APU); >>>>> + >>>>> + if (!reads || !gtt_scannable) >>>>> + return 0; >>>>> + >>>>> + mb(); >>>>> + ret = amdgpu_bo_reserve(bo, true); >>>>> + if (unlikely(ret != 0)) >>>>> + return ret; >>>>> + >>>>> + amdgpu_bo_unpin(bo); >>>>> + >>>>> + amdgpu_bo_unreserve(bo); >>>>> + >>>>> + return 0; >>>>> +} >>>>> + >>>>> +static struct dma_buf_ops amdgpu_dmabuf_ops; static atomic_t >>>>> +aops_lock; >>>>> + >>>>> struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, >>>>> struct drm_gem_object *gobj, >>>>> int flags) >>>>> @@ -178,5 +254,37 @@ struct dma_buf >>>> *amdgpu_gem_prime_export(struct drm_device *dev, >>>>> buf = drm_gem_prime_export(dev, gobj, flags); >>>>> if (!IS_ERR(buf)) >>>>> buf->file->f_mapping = dev->anon_inode->i_mapping; >>>>> + >>>>> + while (amdgpu_dmabuf_ops.begin_cpu_access != >>>> amdgpu_gem_begin_cpu_access || >>>>> + amdgpu_dmabuf_ops.end_cpu_access != >>>> amdgpu_gem_end_cpu_access ) >>>>> + { >>>>> + if (!atomic_cmpxchg(&aops_lock, 0, 1)) { >>>>> + amdgpu_dmabuf_ops = *(buf->ops); >>>>> + amdgpu_dmabuf_ops.begin_cpu_access = >>>> amdgpu_gem_begin_cpu_access; >>>>> + amdgpu_dmabuf_ops.end_cpu_access = >>>> amdgpu_gem_end_cpu_access; >>>>> + } >>>>> + } >>>>> + buf->ops = &amdgpu_dmabuf_ops; >>>>> + >>>>> return buf; >>>>> } >>>>> + >>>>> +struct drm_gem_object *amdgpu_gem_prime_import(struct >> drm_device >>>> *dev, >>>>> + struct dma_buf *dma_buf) >>>>> +{ >>>>> + struct drm_gem_object *obj; >>>>> + >>>>> + if (dma_buf->ops == &amdgpu_dmabuf_ops) { >>>>> + obj = dma_buf->priv; >>>>> + if (obj->dev == dev) { >>>>> + /* >>>>> + * Importing dmabuf exported from out own gem >>>> increases >>>>> + * refcount on gem itself instead of f_count of >>>> dmabuf. >>>>> + */ >>>>> + drm_gem_object_get(obj); >>>>> + return obj; >>>>> + } >>>>> + } >>>>> + >>>>> + return drm_gem_prime_import(dev, dma_buf); } >>>>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c >>>>> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c >>>>> index a3bf021..f25b830 100644 >>>>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c >>>>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c >>>>> @@ -2987,6 +2987,8 @@ static int dm_plane_helper_prepare_fb(struct >>>> drm_plane *plane, >>>>> int r; >>>>> struct dm_plane_state *dm_plane_state_new, >>>> *dm_plane_state_old; >>>>> unsigned int awidth; >>>>> + struct amdgpu_device *adev = plane->dev->dev_private; >>>>> + bool gtt_scannable = (adev->asic_type >= CHIP_CARRIZO && adev- >>>>> flags >>>>> +& AMD_IS_APU); >>>>> >>>>> dm_plane_state_old = to_dm_plane_state(plane->state); >>>>> dm_plane_state_new = to_dm_plane_state(new_state); @@ - >>>> 3005,7 >>>>> +3007,11 @@ static int dm_plane_helper_prepare_fb(struct drm_plane >>>> *plane, >>>>> return r; >>>>> >>>>> r = amdgpu_bo_pin(rbo, AMDGPU_GEM_DOMAIN_VRAM, &afb- >> address); >>>>> - >>>>> + if (unlikely(r != 0)) { >>>>> + /* latest APUs support gtt scan out */ >>>>> + if (gtt_scannable) >>>>> + r = amdgpu_bo_pin(rbo, >>>> AMDGPU_GEM_DOMAIN_GTT, &afb->address); >>>>> + } >>>>> >>>>> amdgpu_bo_unreserve(rbo); >>>>>