Am 29.11.2017 um 01:20 schrieb Samuel Li: > To improve cpu read performance. This is implemented for APUs currently. And once more a NAK for this approach. What we could do is migrate the BO to GTT during mmap, but pinning it is out of question. Regards, Christian. > > Change-Id: I300a7daed8f2b0ba6be71a43196a6b8617ddc62e > --- > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 + > drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 10 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c | 108 ++++++++++++++++++++++ > drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 8 +- > 5 files changed, 126 insertions(+), 4 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index f8657c3..193db70 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > @@ -417,6 +417,8 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev, > struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, > struct drm_gem_object *gobj, > int flags); > +struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev, > + struct dma_buf *dma_buf); > int amdgpu_gem_prime_pin(struct drm_gem_object *obj); > void amdgpu_gem_prime_unpin(struct drm_gem_object *obj); > struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *); > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c > index d704a45..b5483e4 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c > @@ -147,6 +147,7 @@ int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc, > struct drm_device *dev = crtc->dev; > struct amdgpu_device *adev = dev->dev_private; > struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); > + bool gtt_scannable = (adev->asic_type >= CHIP_CARRIZO && adev->flags & AMD_IS_APU); > struct amdgpu_framebuffer *old_amdgpu_fb; > struct amdgpu_framebuffer *new_amdgpu_fb; > struct drm_gem_object *obj; > @@ -190,8 +191,13 @@ int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc, > > r = amdgpu_bo_pin(new_abo, AMDGPU_GEM_DOMAIN_VRAM, &base); > if (unlikely(r != 0)) { > - DRM_ERROR("failed to pin new abo buffer before flip\n"); > - goto unreserve; > + /* latest APUs support gtt scan out */ > + if (gtt_scannable) > + r = amdgpu_bo_pin(new_abo, AMDGPU_GEM_DOMAIN_GTT, &base); > + if (unlikely(r != 0)) { > + DRM_ERROR("failed to pin new abo buffer before flip\n"); > + goto unreserve; > + } > } > > r = reservation_object_get_fences_rcu(new_abo->tbo.resv, &work->excl, > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > index 31383e0..df30b08 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > @@ -868,7 +868,7 @@ static struct drm_driver kms_driver = { > .prime_handle_to_fd = drm_gem_prime_handle_to_fd, > .prime_fd_to_handle = drm_gem_prime_fd_to_handle, > .gem_prime_export = amdgpu_gem_prime_export, > - .gem_prime_import = drm_gem_prime_import, > + .gem_prime_import = amdgpu_gem_prime_import, > .gem_prime_pin = amdgpu_gem_prime_pin, > .gem_prime_unpin = amdgpu_gem_prime_unpin, > .gem_prime_res_obj = amdgpu_gem_prime_res_obj, > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c > index ae9c106..9e1424d 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c > @@ -164,6 +164,82 @@ struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj) > return bo->tbo.resv; > } > > +static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction direction) > +{ > + struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv); > + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); > + long i, ret = 0; > + unsigned old_count; > + bool reads = (direction == DMA_BIDIRECTIONAL || direction == DMA_FROM_DEVICE); > + bool gtt_scannable = (adev->asic_type >= CHIP_CARRIZO && adev->flags & AMD_IS_APU); > + u32 domain; > + > + if (!reads || !gtt_scannable) > + return 0; > + > + ret = amdgpu_bo_reserve(bo, false); > + if (unlikely(ret != 0)) > + return ret; > + > + /* > + * Wait for all shared fences to complete before we switch to future > + * use of exclusive fence on this prime shared bo. > + */ > + ret = reservation_object_wait_timeout_rcu(bo->tbo.resv, true, false, > + MAX_SCHEDULE_TIMEOUT); > + > + if (unlikely(ret < 0)) { > + DRM_DEBUG_PRIME("Fence wait failed: %li\n", ret); > + amdgpu_bo_unreserve(bo); > + return ret; > + } > + > + ret = 0; > + /* Pin to gtt */ > + domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type); > + if (domain == AMDGPU_GEM_DOMAIN_VRAM) { > + old_count = bo->pin_count; > + for (i = 0; i < old_count; i++) > + amdgpu_bo_unpin(bo); > + for (i = 0; i < old_count; i++) { > + ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT, NULL); > + if (unlikely(ret != 0)) > + break; > + } > + } > + if (ret == 0) > + ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT, NULL); > + > + amdgpu_bo_unreserve(bo); > + return ret; > +} > + > +static int amdgpu_gem_end_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction direction) > +{ > + struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv); > + int ret = 0; > + bool reads = (direction == DMA_BIDIRECTIONAL || direction == DMA_FROM_DEVICE); > + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); > + bool gtt_scannable = (adev->asic_type >= CHIP_CARRIZO && adev->flags & AMD_IS_APU); > + > + if (!reads || !gtt_scannable) > + return 0; > + > + mb(); > + ret = amdgpu_bo_reserve(bo, true); > + if (unlikely(ret != 0)) > + return ret; > + > + amdgpu_bo_unpin(bo); > + > + amdgpu_bo_unreserve(bo); > + > + return 0; > +} > + > +static struct dma_buf_ops amdgpu_dmabuf_ops; > +static atomic_t aops_lock; > + > struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, > struct drm_gem_object *gobj, > int flags) > @@ -178,5 +254,37 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, > buf = drm_gem_prime_export(dev, gobj, flags); > if (!IS_ERR(buf)) > buf->file->f_mapping = dev->anon_inode->i_mapping; > + > + while (amdgpu_dmabuf_ops.begin_cpu_access != amdgpu_gem_begin_cpu_access || > + amdgpu_dmabuf_ops.end_cpu_access != amdgpu_gem_end_cpu_access ) > + { > + if (!atomic_cmpxchg(&aops_lock, 0, 1)) { > + amdgpu_dmabuf_ops = *(buf->ops); > + amdgpu_dmabuf_ops.begin_cpu_access = amdgpu_gem_begin_cpu_access; > + amdgpu_dmabuf_ops.end_cpu_access = amdgpu_gem_end_cpu_access; > + } > + } > + buf->ops = &amdgpu_dmabuf_ops; > + > return buf; > } > + > +struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev, > + struct dma_buf *dma_buf) > +{ > + struct drm_gem_object *obj; > + > + if (dma_buf->ops == &amdgpu_dmabuf_ops) { > + obj = dma_buf->priv; > + if (obj->dev == dev) { > + /* > + * Importing dmabuf exported from out own gem increases > + * refcount on gem itself instead of f_count of dmabuf. > + */ > + drm_gem_object_get(obj); > + return obj; > + } > + } > + > + return drm_gem_prime_import(dev, dma_buf); > + } > diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > index a3bf021..f25b830 100644 > --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > @@ -2987,6 +2987,8 @@ static int dm_plane_helper_prepare_fb(struct drm_plane *plane, > int r; > struct dm_plane_state *dm_plane_state_new, *dm_plane_state_old; > unsigned int awidth; > + struct amdgpu_device *adev = plane->dev->dev_private; > + bool gtt_scannable = (adev->asic_type >= CHIP_CARRIZO && adev->flags & AMD_IS_APU); > > dm_plane_state_old = to_dm_plane_state(plane->state); > dm_plane_state_new = to_dm_plane_state(new_state); > @@ -3005,7 +3007,11 @@ static int dm_plane_helper_prepare_fb(struct drm_plane *plane, > return r; > > r = amdgpu_bo_pin(rbo, AMDGPU_GEM_DOMAIN_VRAM, &afb->address); > - > + if (unlikely(r != 0)) { > + /* latest APUs support gtt scan out */ > + if (gtt_scannable) > + r = amdgpu_bo_pin(rbo, AMDGPU_GEM_DOMAIN_GTT, &afb->address); > + } > > amdgpu_bo_unreserve(rbo); >