> If that is what this callback should do then this implementation would be > incorrect. Pinning doesn't wait for any GPU operation to finish. During pining, it will all the fences to finish. That shall be OK. Sam > -----Original Message----- > From: Koenig, Christian > Sent: Wednesday, September 20, 2017 12:21 PM > To: Li, Samuel <Samuel.Li at amd.com>; amd-gfx at lists.freedesktop.org > Subject: Re: [PATCH v2 1/1] drm/amdgpu: Add gem_prime_mmap support > > Am 20.09.2017 um 17:44 schrieb Li, Samuel: > >> What happens when another thread is using amdgpu_dmabuf_ops to call > >> begin_cpu_access/end_cpu_access when you are fixing it up again? > > Right, that is an issue. > > A simple "if (!amdgpu_dmabuf_ops.begin_cpu_access)" should be able to > deal with that. > > > > >> I would just completely drop the two callbacks, pinning is not > >> necessary for CPU access and thinking more about it it actually has > >> some unwanted side effects. > > CPU access needs synchronization anyway, so the two callbacks cannot be > dropped (other drivers implemented too), so I would like to keep it there for > now. > > Wait a second what do you mean with "CPU access needs synchronization"? > > At least i915 makes the memory GPU inaccessible when you start to use it > with the CPU. > > If that is what this callback should do then this implementation would be > incorrect. Pinning doesn't wait for any GPU operation to finish. > > Regards, > Christian. > > > > > Sam > > > > > >> -----Original Message----- > >> From: Koenig, Christian > >> Sent: Wednesday, September 20, 2017 2:58 AM > >> To: Li, Samuel <Samuel.Li at amd.com>; amd-gfx at lists.freedesktop.org > >> Subject: Re: [PATCH v2 1/1] drm/amdgpu: Add gem_prime_mmap > support > >> > >>> What do you mean "This isn't race free"? > >> Take a look at the code again: > >>> + dma_buf = drm_gem_prime_export(dev, gobj, flags); > >>> + amdgpu_dmabuf_ops = *(dma_buf->ops); > >>> + amdgpu_dmabuf_ops.begin_cpu_access = > >> amdgpu_gem_begin_cpu_access; > >>> + amdgpu_dmabuf_ops.end_cpu_access = > >> amdgpu_gem_end_cpu_access; > >>> + dma_buf->ops = &amdgpu_dmabuf_ops; > >> What happens when another thread is using amdgpu_dmabuf_ops to call > >> begin_cpu_access/end_cpu_access when you are fixing it up again? > >> > >> I would just completely drop the two callbacks, pinning is not > >> necessary for CPU access and thinking more about it it actually has > >> some unwanted side effects. > >> > >> Regards, > >> Christian. > >> > >> Am 19.09.2017 um 23:22 schrieb Samuel Li: > >>>>> + vma->vm_pgoff = amdgpu_bo_mmap_offset(bo) >> PAGE_SHIFT; > >>>> Maybe better use "vma->vm_pgoff += > amdgpu_bo_mmap_offset(bo) >> > >> PAGE_SHIFT;", but I'm not sure. > >>>> How other drivers handle this? > >>> This is a good catch. Looks like pgoff is honored during prime mmap, > >>> not a > >> fake offset here. > >>>>> + dma_buf->ops = &amdgpu_dmabuf_ops; > >>>> This isn't race free and needs to be fixed. > >>>> Better add callbacks to drm_prime.c similar to > >> drm_gem_dmabuf_mmap(). > >>> What do you mean "This isn't race free"? > >>> > >>> Regards, > >>> Sam > >>> > >>> > >>> > >>> On 2017-09-15 11:05 AM, Christian König wrote: > >>>> Am 14.09.2017 um 00:39 schrieb Samuel Li: > >>>>> v2: drop hdp invalidate/flush. > >>>>> > >>>>> Signed-off-by: Samuel Li <Samuel.Li at amd.com> > >>>>> --- > >>>>> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 ++ > >>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 +- > >>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c | 77 > >> ++++++++++++++++++++++++++++++- > >>>>> 3 files changed, 81 insertions(+), 2 deletions(-) > >>>>> > >>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > >>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > >>>>> index d2aaad7..188b705 100644 > >>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > >>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > >>>>> @@ -395,11 +395,14 @@ > amdgpu_gem_prime_import_sg_table(struct > >> drm_device *dev, > >>>>> struct dma_buf *amdgpu_gem_prime_export(struct drm_device > *dev, > >>>>> struct drm_gem_object *gobj, > >>>>> int flags); > >>>>> +struct drm_gem_object *amdgpu_gem_prime_import(struct > >> drm_device *dev, > >>>>> + struct dma_buf *dma_buf); > >>>>> int amdgpu_gem_prime_pin(struct drm_gem_object *obj); > >>>>> void amdgpu_gem_prime_unpin(struct drm_gem_object *obj); > >>>>> struct reservation_object *amdgpu_gem_prime_res_obj(struct > >> drm_gem_object *); > >>>>> void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj); > >>>>> void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, > void > >>>>> *vaddr); > >>>>> +int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct > >>>>> +vm_area_struct *vma); > >>>>> int amdgpu_gem_debugfs_init(struct amdgpu_device *adev); > >>>>> /* sub-allocation manager, it has to be protected by another lock. > >>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > >>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > >>>>> index 2cdf844..9b63ac5 100644 > >>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > >>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > >>>>> @@ -835,7 +835,7 @@ static struct drm_driver kms_driver = { > >>>>> .prime_handle_to_fd = drm_gem_prime_handle_to_fd, > >>>>> .prime_fd_to_handle = drm_gem_prime_fd_to_handle, > >>>>> .gem_prime_export = amdgpu_gem_prime_export, > >>>>> - .gem_prime_import = drm_gem_prime_import, > >>>>> + .gem_prime_import = amdgpu_gem_prime_import, > >>>>> .gem_prime_pin = amdgpu_gem_prime_pin, > >>>>> .gem_prime_unpin = amdgpu_gem_prime_unpin, > >>>>> .gem_prime_res_obj = amdgpu_gem_prime_res_obj, @@ -843,6 > >>>>> +843,7 @@ static struct drm_driver kms_driver = { > >>>>> .gem_prime_import_sg_table = > >> amdgpu_gem_prime_import_sg_table, > >>>>> .gem_prime_vmap = amdgpu_gem_prime_vmap, > >>>>> .gem_prime_vunmap = amdgpu_gem_prime_vunmap, > >>>>> + .gem_prime_mmap = amdgpu_gem_prime_mmap, > >>>>> .name = DRIVER_NAME, > >>>>> .desc = DRIVER_DESC, > >>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c > >>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c > >>>>> index 5b3f928..13c977a 100644 > >>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c > >>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c > >>>>> @@ -57,6 +57,40 @@ void amdgpu_gem_prime_vunmap(struct > >> drm_gem_object *obj, void *vaddr) > >>>>> ttm_bo_kunmap(&bo->dma_buf_vmap); > >>>>> } > >>>>> +int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, > struct > >>>>> vm_area_struct *vma) > >>>>> +{ > >>>>> + struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); > >>>>> + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); > >>>>> + unsigned asize = amdgpu_bo_size(bo); > >>>>> + int ret; > >>>>> + > >>>>> + if (!vma->vm_file) > >>>>> + return -ENODEV; > >>>>> + > >>>>> + if (adev == NULL) > >>>>> + return -ENODEV; > >>>>> + > >>>>> + /* Check for valid size. */ > >>>>> + if (asize < vma->vm_end - vma->vm_start) > >>>>> + return -EINVAL; > >>>>> + > >>>>> + if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) || > >>>>> + (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)) { > >>>>> + return -EPERM; > >>>>> + } > >>>>> + vma->vm_pgoff = amdgpu_bo_mmap_offset(bo) >> PAGE_SHIFT; > >>>> Maybe better use "vma->vm_pgoff += > amdgpu_bo_mmap_offset(bo) >> > >> PAGE_SHIFT;", but I'm not sure. > >>>> How other drivers handle this? > >>>> > >>>>> + > >>>>> + /* prime mmap does not need to check access, so allow here */ > >>>>> + ret = drm_vma_node_allow(&obj->vma_node, vma->vm_file- > >>> private_data); > >>>>> + if (ret) > >>>>> + return ret; > >>>>> + > >>>>> + ret = ttm_bo_mmap(vma->vm_file, vma, &adev->mman.bdev); > >>>>> + drm_vma_node_revoke(&obj->vma_node, > >>>>> + vma->vm_file->private_data); > >>>>> + > >>>>> + return ret; > >>>>> +} > >>>>> + > >>>>> struct drm_gem_object * > >>>>> amdgpu_gem_prime_import_sg_table(struct drm_device *dev, > >>>>> struct dma_buf_attachment *attach, @@ -130,14 > >>>>> +164,55 @@ struct reservation_object > >> *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj) > >>>>> return bo->tbo.resv; > >>>>> } > >>>>> +static int amdgpu_gem_begin_cpu_access(struct dma_buf > >>>>> *dma_buf, enum dma_data_direction direction) > >>>>> +{ > >>>>> + return amdgpu_gem_prime_pin(dma_buf->priv); > >>>>> +} > >>>>> + > >>>>> +static int amdgpu_gem_end_cpu_access(struct dma_buf *dma_buf, > >> enum > >>>>> +dma_data_direction direction) { > >>>>> + amdgpu_gem_prime_unpin(dma_buf->priv); > >>>>> + > >>>>> + return 0; > >>>>> +} > >>>>> + > >>>>> +static struct dma_buf_ops amdgpu_dmabuf_ops; > >>>>> + > >>>>> struct dma_buf *amdgpu_gem_prime_export(struct drm_device > *dev, > >>>>> struct drm_gem_object *gobj, > >>>>> int flags) > >>>>> { > >>>>> struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj); > >>>>> + struct dma_buf *dma_buf; > >>>>> if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) > >>>>> return ERR_PTR(-EPERM); > >>>>> - return drm_gem_prime_export(dev, gobj, flags); > >>>>> + dma_buf = drm_gem_prime_export(dev, gobj, flags); > >>>>> + amdgpu_dmabuf_ops = *(dma_buf->ops); > >>>>> + amdgpu_dmabuf_ops.begin_cpu_access = > >> amdgpu_gem_begin_cpu_access; > >>>>> + amdgpu_dmabuf_ops.end_cpu_access = > >> amdgpu_gem_end_cpu_access; > >>>>> + dma_buf->ops = &amdgpu_dmabuf_ops; > >>>> This isn't race free and needs to be fixed. > >>>> > >>>> Better add callbacks to drm_prime.c similar to > >> drm_gem_dmabuf_mmap(). > >>>> Alternative you could just completely drop > >> amdgpu_gem_begin_cpu_access() and amdgpu_gem_end_cpu_access() > as > >> well. > >>>> When the buffer is shared between device it is pinned anyway and > >>>> when > >> it isn't shared ttm_bo_mmap() is able to handle VRAM mappings as well. > >>>> Regards, > >>>> Christian. > >>>> > >>>>> + > >>>>> + return dma_buf; > >>>>> +} > >>>>> + > >>>>> +struct drm_gem_object *amdgpu_gem_prime_import(struct > >> drm_device *dev, > >>>>> + struct dma_buf *dma_buf) { > >>>>> + struct drm_gem_object *obj; > >>>>> + > >>>>> + if (dma_buf->ops == &amdgpu_dmabuf_ops) { > >>>>> + obj = dma_buf->priv; > >>>>> + if (obj->dev == dev) { > >>>>> + /* > >>>>> + * Importing dmabuf exported from out own gem increases > >>>>> + * refcount on gem itself instead of f_count of dmabuf. > >>>>> + */ > >>>>> + drm_gem_object_get(obj); > >>>>> + return obj; > >>>>> + } > >>>>> + } > >>>>> + > >>>>> + return drm_gem_prime_import(dev, dma_buf); > >>>>> } >