To explain the operations a little bit, the tests include repeated testing of the following sequences, 91 const int sequences[4][4] = { 92 { STEP_MMAP, STEP_FAULT, STEP_FLIP, STEP_DRAW }, 93 { STEP_MMAP, STEP_FLIP, STEP_DRAW, STEP_SKIP }, 94 { STEP_MMAP, STEP_DRAW, STEP_FLIP, STEP_SKIP }, 95 { STEP_FLIP, STEP_MMAP, STEP_DRAW, STEP_SKIP }, 96 }; Here STEP_MMAP includes prime_mmap() and begin_cpu_access(). STEP_DRAW includes repeated cpu reads/writes. It looks to me the dma_buf has to be pinned to gtt here, to prevent it being pinned back by STEP_FLIP before drawing. > I've send out a WIP branch a for this a few weeks ago, but haven't worked on > this in a while. BTW it is only supported on Carizzo and Raven. Can you show me the branch? Looks like we need to get if finished. Sam > -----Original Message----- > From: Koenig, Christian > Sent: Wednesday, November 29, 2017 10:01 AM > To: Li, Samuel <Samuel.Li at amd.com>; amd-gfx at lists.freedesktop.org > Subject: Re: [PATCH 1/1] drm/amdgpu: Pin to gtt before cpu accesses dma > buf. > > > What is the concern for scanning out from GTT on APUs? > It's simply not implemented yet. You need quite a bunch of different setup in > DC for this to work. > > I've send out a WIP branch a for this a few weeks ago, but haven't worked on > this in a while. BTW it is only supported on Carizzo and Raven. > > But even then pinning a BO to GTT for this would still be a no-go. We could > just avoid the copy on scanout when the BO is already inside GTT because of > the CPU access. > > In general we should rather work on this as Michel described and avoid > creating the BO in VRAM in the first place if possible. > > Regards, > Christian. > > Am 29.11.2017 um 15:56 schrieb Li, Samuel: > > One major purpose of the ChromeOS mmap_test is to avoid buffer copying. > What is the concern for scanning out from GTT on APUs? > > > > Sam > > > >> -----Original Message----- > >> From: Koenig, Christian > >> Sent: Wednesday, November 29, 2017 9:54 AM > >> To: Li, Samuel <Samuel.Li at amd.com>; amd-gfx at lists.freedesktop.org > >> Subject: Re: [PATCH 1/1] drm/amdgpu: Pin to gtt before cpu accesses > >> dma buf. > >> > >> And exactly that's the reason why it is a no-go. > >> > >> Scanning out from GTT isn't supported at the moment. > >> > >> Christian. > >> > >> Am 29.11.2017 um 15:48 schrieb Li, Samuel: > >>> Actually it needs to be pinned here, since otherwise page flip will > >>> pin it into > >> vram. > >>> SAm > >>> > >>> > >>>> -----Original Message----- > >>>> From: Christian König [mailto:ckoenig.leichtzumerken at gmail.com] > >>>> Sent: Wednesday, November 29, 2017 4:39 AM > >>>> To: Li, Samuel <Samuel.Li at amd.com>; amd-gfx at lists.freedesktop.org > >>>> Subject: Re: [PATCH 1/1] drm/amdgpu: Pin to gtt before cpu accesses > >>>> dma buf. > >>>> > >>>> Am 29.11.2017 um 01:20 schrieb Samuel Li: > >>>>> To improve cpu read performance. This is implemented for APUs > >> currently. > >>>> And once more a NAK for this approach. > >>>> > >>>> What we could do is migrate the BO to GTT during mmap, but pinning > >>>> it is out of question. > >>>> > >>>> Regards, > >>>> Christian. > >>>> > >>>>> Change-Id: I300a7daed8f2b0ba6be71a43196a6b8617ddc62e > >>>>> --- > >>>>> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 + > >>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 10 +- > >>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 +- > >>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c | 108 > >>>> ++++++++++++++++++++++ > >>>>> drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 8 +- > >>>>> 5 files changed, 126 insertions(+), 4 deletions(-) > >>>>> > >>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > >>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > >>>>> index f8657c3..193db70 100644 > >>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > >>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > >>>>> @@ -417,6 +417,8 @@ amdgpu_gem_prime_import_sg_table(struct > >>>> drm_device *dev, > >>>>> struct dma_buf *amdgpu_gem_prime_export(struct drm_device > *dev, > >>>>> struct drm_gem_object > *gobj, > >>>>> int flags); > >>>>> +struct drm_gem_object *amdgpu_gem_prime_import(struct > >> drm_device > >>>> *dev, > >>>>> + struct dma_buf *dma_buf); > >>>>> int amdgpu_gem_prime_pin(struct drm_gem_object *obj); > >>>>> void amdgpu_gem_prime_unpin(struct drm_gem_object *obj); > >>>>> struct reservation_object *amdgpu_gem_prime_res_obj(struct > >>>>> drm_gem_object *); diff --git > >>>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c > >>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c > >>>>> index d704a45..b5483e4 100644 > >>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c > >>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c > >>>>> @@ -147,6 +147,7 @@ int amdgpu_crtc_page_flip_target(struct > >> drm_crtc > >>>> *crtc, > >>>>> struct drm_device *dev = crtc->dev; > >>>>> struct amdgpu_device *adev = dev->dev_private; > >>>>> struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); > >>>>> + bool gtt_scannable = (adev->asic_type >= CHIP_CARRIZO && > adev- > >>>>> flags > >>>>> +& AMD_IS_APU); > >>>>> struct amdgpu_framebuffer *old_amdgpu_fb; > >>>>> struct amdgpu_framebuffer *new_amdgpu_fb; > >>>>> struct drm_gem_object *obj; > >>>>> @@ -190,8 +191,13 @@ int amdgpu_crtc_page_flip_target(struct > >>>>> drm_crtc *crtc, > >>>>> > >>>>> r = amdgpu_bo_pin(new_abo, > AMDGPU_GEM_DOMAIN_VRAM, > >>>> &base); > >>>>> if (unlikely(r != 0)) { > >>>>> - DRM_ERROR("failed to pin new abo buffer before > flip\n"); > >>>>> - goto unreserve; > >>>>> + /* latest APUs support gtt scan out */ > >>>>> + if (gtt_scannable) > >>>>> + r = amdgpu_bo_pin(new_abo, > >>>> AMDGPU_GEM_DOMAIN_GTT, &base); > >>>>> + if (unlikely(r != 0)) { > >>>>> + DRM_ERROR("failed to pin new abo buffer > before > >>>> flip\n"); > >>>>> + goto unreserve; > >>>>> + } > >>>>> } > >>>>> > >>>>> r = reservation_object_get_fences_rcu(new_abo->tbo.resv, > >>>>> &work->excl, diff --git > a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > >>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > >>>>> index 31383e0..df30b08 100644 > >>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > >>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > >>>>> @@ -868,7 +868,7 @@ static struct drm_driver kms_driver = { > >>>>> .prime_handle_to_fd = drm_gem_prime_handle_to_fd, > >>>>> .prime_fd_to_handle = drm_gem_prime_fd_to_handle, > >>>>> .gem_prime_export = amdgpu_gem_prime_export, > >>>>> - .gem_prime_import = drm_gem_prime_import, > >>>>> + .gem_prime_import = amdgpu_gem_prime_import, > >>>>> .gem_prime_pin = amdgpu_gem_prime_pin, > >>>>> .gem_prime_unpin = amdgpu_gem_prime_unpin, > >>>>> .gem_prime_res_obj = amdgpu_gem_prime_res_obj, diff -- > git > >>>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c > >>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c > >>>>> index ae9c106..9e1424d 100644 > >>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c > >>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c > >>>>> @@ -164,6 +164,82 @@ struct reservation_object > >>>> *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj) > >>>>> return bo->tbo.resv; > >>>>> } > >>>>> > >>>>> +static int amdgpu_gem_begin_cpu_access(struct dma_buf > *dma_buf, > >>>> enum > >>>>> +dma_data_direction direction) { > >>>>> + struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf- > >priv); > >>>>> + struct amdgpu_device *adev = amdgpu_ttm_adev(bo- > >tbo.bdev); > >>>>> + long i, ret = 0; > >>>>> + unsigned old_count; > >>>>> + bool reads = (direction == DMA_BIDIRECTIONAL || direction > == > >>>> DMA_FROM_DEVICE); > >>>>> + bool gtt_scannable = (adev->asic_type >= CHIP_CARRIZO && > adev- > >>>>> flags & AMD_IS_APU); > >>>>> + u32 domain; > >>>>> + > >>>>> + if (!reads || !gtt_scannable) > >>>>> + return 0; > >>>>> + > >>>>> + ret = amdgpu_bo_reserve(bo, false); > >>>>> + if (unlikely(ret != 0)) > >>>>> + return ret; > >>>>> + > >>>>> + /* > >>>>> + * Wait for all shared fences to complete before we switch to > future > >>>>> + * use of exclusive fence on this prime shared bo. > >>>>> + */ > >>>>> + ret = reservation_object_wait_timeout_rcu(bo->tbo.resv, > true, false, > >>>>> + > MAX_SCHEDULE_TIMEOUT); > >>>>> + > >>>>> + if (unlikely(ret < 0)) { > >>>>> + DRM_DEBUG_PRIME("Fence wait failed: %li\n", ret); > >>>>> + amdgpu_bo_unreserve(bo); > >>>>> + return ret; > >>>>> + } > >>>>> + > >>>>> + ret = 0; > >>>>> + /* Pin to gtt */ > >>>>> + domain = amdgpu_mem_type_to_domain(bo- > >>>>> tbo.mem.mem_type); > >>>>> + if (domain == AMDGPU_GEM_DOMAIN_VRAM) { > >>>>> + old_count = bo->pin_count; > >>>>> + for (i = 0; i < old_count; i++) > >>>>> + amdgpu_bo_unpin(bo); > >>>>> + for (i = 0; i < old_count; i++) { > >>>>> + ret = amdgpu_bo_pin(bo, > >>>> AMDGPU_GEM_DOMAIN_GTT, NULL); > >>>>> + if (unlikely(ret != 0)) > >>>>> + break; > >>>>> + } > >>>>> + } > >>>>> + if (ret == 0) > >>>>> + ret = amdgpu_bo_pin(bo, > AMDGPU_GEM_DOMAIN_GTT, > >>>> NULL); > >>>>> + > >>>>> + amdgpu_bo_unreserve(bo); > >>>>> + return ret; > >>>>> +} > >>>>> + > >>>>> +static int amdgpu_gem_end_cpu_access(struct dma_buf *dma_buf, > >>>> enum > >>>>> +dma_data_direction direction) { > >>>>> + struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf- > >priv); > >>>>> + int ret = 0; > >>>>> + bool reads = (direction == DMA_BIDIRECTIONAL || direction > == > >>>> DMA_FROM_DEVICE); > >>>>> + struct amdgpu_device *adev = amdgpu_ttm_adev(bo- > >tbo.bdev); > >>>>> + bool gtt_scannable = (adev->asic_type >= CHIP_CARRIZO && > adev- > >>>>> flags > >>>>> +& AMD_IS_APU); > >>>>> + > >>>>> + if (!reads || !gtt_scannable) > >>>>> + return 0; > >>>>> + > >>>>> + mb(); > >>>>> + ret = amdgpu_bo_reserve(bo, true); > >>>>> + if (unlikely(ret != 0)) > >>>>> + return ret; > >>>>> + > >>>>> + amdgpu_bo_unpin(bo); > >>>>> + > >>>>> + amdgpu_bo_unreserve(bo); > >>>>> + > >>>>> + return 0; > >>>>> +} > >>>>> + > >>>>> +static struct dma_buf_ops amdgpu_dmabuf_ops; static atomic_t > >>>>> +aops_lock; > >>>>> + > >>>>> struct dma_buf *amdgpu_gem_prime_export(struct drm_device > *dev, > >>>>> struct drm_gem_object > *gobj, > >>>>> int flags) > >>>>> @@ -178,5 +254,37 @@ struct dma_buf > >>>> *amdgpu_gem_prime_export(struct drm_device *dev, > >>>>> buf = drm_gem_prime_export(dev, gobj, flags); > >>>>> if (!IS_ERR(buf)) > >>>>> buf->file->f_mapping = dev->anon_inode- > >i_mapping; > >>>>> + > >>>>> + while (amdgpu_dmabuf_ops.begin_cpu_access != > >>>> amdgpu_gem_begin_cpu_access || > >>>>> + amdgpu_dmabuf_ops.end_cpu_access != > >>>> amdgpu_gem_end_cpu_access ) > >>>>> + { > >>>>> + if (!atomic_cmpxchg(&aops_lock, 0, 1)) { > >>>>> + amdgpu_dmabuf_ops = *(buf->ops); > >>>>> + amdgpu_dmabuf_ops.begin_cpu_access = > >>>> amdgpu_gem_begin_cpu_access; > >>>>> + amdgpu_dmabuf_ops.end_cpu_access = > >>>> amdgpu_gem_end_cpu_access; > >>>>> + } > >>>>> + } > >>>>> + buf->ops = &amdgpu_dmabuf_ops; > >>>>> + > >>>>> return buf; > >>>>> } > >>>>> + > >>>>> +struct drm_gem_object *amdgpu_gem_prime_import(struct > >> drm_device > >>>> *dev, > >>>>> + struct dma_buf *dma_buf) > { > >>>>> + struct drm_gem_object *obj; > >>>>> + > >>>>> + if (dma_buf->ops == &amdgpu_dmabuf_ops) { > >>>>> + obj = dma_buf->priv; > >>>>> + if (obj->dev == dev) { > >>>>> + /* > >>>>> + * Importing dmabuf exported from out own > gem > >>>> increases > >>>>> + * refcount on gem itself instead of f_count > of > >>>> dmabuf. > >>>>> + */ > >>>>> + drm_gem_object_get(obj); > >>>>> + return obj; > >>>>> + } > >>>>> + } > >>>>> + > >>>>> + return drm_gem_prime_import(dev, dma_buf); } > >>>>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > >>>>> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > >>>>> index a3bf021..f25b830 100644 > >>>>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > >>>>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > >>>>> @@ -2987,6 +2987,8 @@ static int > dm_plane_helper_prepare_fb(struct > >>>> drm_plane *plane, > >>>>> int r; > >>>>> struct dm_plane_state *dm_plane_state_new, > >>>> *dm_plane_state_old; > >>>>> unsigned int awidth; > >>>>> + struct amdgpu_device *adev = plane->dev->dev_private; > >>>>> + bool gtt_scannable = (adev->asic_type >= CHIP_CARRIZO && > adev- > >>>>> flags > >>>>> +& AMD_IS_APU); > >>>>> > >>>>> dm_plane_state_old = to_dm_plane_state(plane->state); > >>>>> dm_plane_state_new = to_dm_plane_state(new_state); > @@ - > >>>> 3005,7 > >>>>> +3007,11 @@ static int dm_plane_helper_prepare_fb(struct > drm_plane > >>>> *plane, > >>>>> return r; > >>>>> > >>>>> r = amdgpu_bo_pin(rbo, AMDGPU_GEM_DOMAIN_VRAM, > &afb- > >> address); > >>>>> - > >>>>> + if (unlikely(r != 0)) { > >>>>> + /* latest APUs support gtt scan out */ > >>>>> + if (gtt_scannable) > >>>>> + r = amdgpu_bo_pin(rbo, > >>>> AMDGPU_GEM_DOMAIN_GTT, &afb->address); > >>>>> + } > >>>>> > >>>>> amdgpu_bo_unreserve(rbo); > >>>>>