Hi Sui, Le mercredi 07 juin 2023 à 13:30 +0800, Sui Jingfeng a écrit : > The single map_noncoherent member of struct drm_gem_dma_object may > not > sufficient for describing the backing memory of the GEM buffer > object. > > Especially on dma-coherent systems, the backing memory is both cached > coherent for multi-core CPUs and dma-coherent for peripheral device. > Say architectures like X86-64, LoongArch64, Loongson Mips64, etc. > > Whether a peripheral device is dma-coherent or not can be > implementation-dependent. The single map_noncoherent option is not > enough > to reflect real hardware anymore. For example, the Loongson LS3A4000 > CPU > and LS2K2000/LS2K1000 SoC, peripheral device of such hardware > platform > allways snoop CPU's cache. Doing the allocation with > dma_alloc_coherent > function is preferred. The return buffer is cached, it should not > using > the default write-combine mapping. While with the current implement, > there > no way to tell the drm core to reflect this. > > This patch adds cached and coherent members to struct > drm_gem_dma_object. > which allow driver implements to inform the core. Introducing new > mappings > while keeping the original default behavior unchanged. Did you try to simply set the "dma-coherent" property to the device's node? >From what I understand if you add that property then Linux will use DMA coherent memory even though you use dma_alloc_noncoherent() and the sync_single_for_cpu() / sync_single_for_device() are then NOPs. Cheers, -Paul > Signed-off-by: Sui Jingfeng <suijingfeng@xxxxxxxxxxx> > --- > drivers/gpu/drm/drm_fb_dma_helper.c | 11 +++++------ > drivers/gpu/drm/drm_fbdev_dma.c | 2 +- > drivers/gpu/drm/drm_gem_dma_helper.c | 20 ++++++++++++++++---- > drivers/gpu/drm/ingenic/ingenic-drm-drv.c | 5 ++++- > drivers/gpu/drm/rcar-du/Kconfig | 2 -- > drivers/gpu/drm/rcar-du/rcar_du_kms.c | 4 +++- > include/drm/drm_gem_dma_helper.h | 7 +++++-- > 7 files changed, 34 insertions(+), 17 deletions(-) > > diff --git a/drivers/gpu/drm/drm_fb_dma_helper.c > b/drivers/gpu/drm/drm_fb_dma_helper.c > index 3b535ad1b07c..93ff05041192 100644 > --- a/drivers/gpu/drm/drm_fb_dma_helper.c > +++ b/drivers/gpu/drm/drm_fb_dma_helper.c > @@ -106,16 +106,15 @@ dma_addr_t drm_fb_dma_get_gem_addr(struct > drm_framebuffer *fb, > EXPORT_SYMBOL_GPL(drm_fb_dma_get_gem_addr); > > /** > - * drm_fb_dma_sync_non_coherent - Sync GEM object to non-coherent > backing > - * memory > + * drm_fb_dma_sync_non_coherent - Sync GEM object to cached backing > memory > * @drm: DRM device > * @old_state: Old plane state > * @state: New plane state > * > * This function can be used by drivers that use damage clips and > have > - * DMA GEM objects backed by non-coherent memory. Calling this > function > - * in a plane's .atomic_update ensures that all the data in the > backing > - * memory have been written to RAM. > + * DMA GEM objects backed by cached memory. Calling this function in > a > + * plane's .atomic_update ensures that all the data in the backing > memory > + * have been written to RAM. > */ > void drm_fb_dma_sync_non_coherent(struct drm_device *drm, > struct drm_plane_state *old_state, > @@ -131,7 +130,7 @@ void drm_fb_dma_sync_non_coherent(struct > drm_device *drm, > > for (i = 0; i < finfo->num_planes; i++) { > dma_obj = drm_fb_dma_get_gem_obj(state->fb, i); > - if (!dma_obj->map_noncoherent) > + if (dma_obj->cached && dma_obj->coherent) > continue; > > daddr = drm_fb_dma_get_gem_addr(state->fb, state, i); > diff --git a/drivers/gpu/drm/drm_fbdev_dma.c > b/drivers/gpu/drm/drm_fbdev_dma.c > index d86773fa8ab0..49fe9b284cc8 100644 > --- a/drivers/gpu/drm/drm_fbdev_dma.c > +++ b/drivers/gpu/drm/drm_fbdev_dma.c > @@ -131,7 +131,7 @@ static int drm_fbdev_dma_helper_fb_probe(struct > drm_fb_helper *fb_helper, > > /* screen */ > info->flags |= FBINFO_VIRTFB; /* system memory */ > - if (dma_obj->map_noncoherent) > + if (dma_obj->cached) > info->flags |= FBINFO_READS_FAST; /* signal caching > */ > info->screen_size = sizes->surface_height * fb->pitches[0]; > info->screen_buffer = map.vaddr; > diff --git a/drivers/gpu/drm/drm_gem_dma_helper.c > b/drivers/gpu/drm/drm_gem_dma_helper.c > index 870b90b78bc4..dec1d512bdf1 100644 > --- a/drivers/gpu/drm/drm_gem_dma_helper.c > +++ b/drivers/gpu/drm/drm_gem_dma_helper.c > @@ -93,7 +93,11 @@ __drm_gem_dma_create(struct drm_device *drm, > size_t size, bool private) > drm_gem_private_object_init(drm, gem_obj, size); > > /* Always use writecombine for dma-buf mappings */ > - dma_obj->map_noncoherent = false; > + /* FIXME: This is not always true, on some dma > coherent system, > + * cached mappings should be preferred over > writecombine > + */ > + dma_obj->cached = false; > + dma_obj->coherent = false; > } else { > ret = drm_gem_object_init(drm, gem_obj, size); > } > @@ -143,7 +147,11 @@ struct drm_gem_dma_object > *drm_gem_dma_create(struct drm_device *drm, > if (IS_ERR(dma_obj)) > return dma_obj; > > - if (dma_obj->map_noncoherent) { > + if (dma_obj->cached && dma_obj->coherent) { > + dma_obj->vaddr = dma_alloc_coherent(drm->dev, size, > + &dma_obj- > >dma_addr, > + GFP_KERNEL | > __GFP_NOWARN); > + } else if (dma_obj->cached && !dma_obj->coherent) { > dma_obj->vaddr = dma_alloc_noncoherent(drm->dev, > size, > &dma_obj- > >dma_addr, > DMA_TO_DEVICE, > @@ -153,6 +161,7 @@ struct drm_gem_dma_object > *drm_gem_dma_create(struct drm_device *drm, > &dma_obj->dma_addr, > GFP_KERNEL | > __GFP_NOWARN); > } > + > if (!dma_obj->vaddr) { > drm_dbg(drm, "failed to allocate buffer with size > %zu\n", > size); > @@ -233,7 +242,10 @@ void drm_gem_dma_free(struct drm_gem_dma_object > *dma_obj) > dma_buf_vunmap_unlocked(gem_obj- > >import_attach->dmabuf, &map); > drm_prime_gem_destroy(gem_obj, dma_obj->sgt); > } else if (dma_obj->vaddr) { > - if (dma_obj->map_noncoherent) > + if (dma_obj->cached && dma_obj->coherent) > + dma_free_coherent(gem_obj->dev->dev, dma_obj- > >base.size, > + dma_obj->vaddr, dma_obj- > >dma_addr); > + else if (dma_obj->cached && !dma_obj->coherent) > dma_free_noncoherent(gem_obj->dev->dev, > dma_obj->base.size, > dma_obj->vaddr, dma_obj- > >dma_addr, > DMA_TO_DEVICE); > @@ -532,7 +544,7 @@ int drm_gem_dma_mmap(struct drm_gem_dma_object > *dma_obj, struct vm_area_struct * > vma->vm_pgoff -= drm_vma_node_start(&obj->vma_node); > vm_flags_mod(vma, VM_DONTEXPAND, VM_PFNMAP); > > - if (dma_obj->map_noncoherent) { > + if (dma_obj->cached) { > vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); > > ret = dma_mmap_pages(dma_obj->base.dev->dev, > diff --git a/drivers/gpu/drm/ingenic/ingenic-drm-drv.c > b/drivers/gpu/drm/ingenic/ingenic-drm-drv.c > index 5ec75e9ba499..a3df2f99a757 100644 > --- a/drivers/gpu/drm/ingenic/ingenic-drm-drv.c > +++ b/drivers/gpu/drm/ingenic/ingenic-drm-drv.c > @@ -919,7 +919,10 @@ ingenic_drm_gem_create_object(struct drm_device > *drm, size_t size) > if (!obj) > return ERR_PTR(-ENOMEM); > > - obj->map_noncoherent = priv->soc_info->map_noncoherent; > + if (priv->soc_info->map_noncoherent) { > + obj->cached = true; > + obj->coherent = false; > + } > > return &obj->base; > } > diff --git a/drivers/gpu/drm/rcar-du/Kconfig b/drivers/gpu/drm/rcar- > du/Kconfig > index 53c356aed5d5..dddc70c08bdc 100644 > --- a/drivers/gpu/drm/rcar-du/Kconfig > +++ b/drivers/gpu/drm/rcar-du/Kconfig > @@ -2,8 +2,6 @@ > config DRM_RCAR_DU > tristate "DRM Support for R-Car Display Unit" > depends on DRM && OF > - depends on ARM || ARM64 > - depends on ARCH_RENESAS || COMPILE_TEST > select DRM_KMS_HELPER > select DRM_GEM_DMA_HELPER > select VIDEOMODE_HELPERS > diff --git a/drivers/gpu/drm/rcar-du/rcar_du_kms.c > b/drivers/gpu/drm/rcar-du/rcar_du_kms.c > index adfb36b0e815..1142d51473e6 100644 > --- a/drivers/gpu/drm/rcar-du/rcar_du_kms.c > +++ b/drivers/gpu/drm/rcar-du/rcar_du_kms.c > @@ -386,7 +386,9 @@ struct drm_gem_object > *rcar_du_gem_prime_import_sg_table(struct drm_device *dev, > gem_obj->funcs = &rcar_du_gem_funcs; > > drm_gem_private_object_init(dev, gem_obj, attach->dmabuf- > >size); > - dma_obj->map_noncoherent = false; > + > + dma_obj->cached = false; > + dma_obj->coherent = false; > > ret = drm_gem_create_mmap_offset(gem_obj); > if (ret) { > diff --git a/include/drm/drm_gem_dma_helper.h > b/include/drm/drm_gem_dma_helper.h > index 8a043235dad8..585ce3d4d1eb 100644 > --- a/include/drm/drm_gem_dma_helper.h > +++ b/include/drm/drm_gem_dma_helper.h > @@ -16,7 +16,9 @@ struct drm_mode_create_dumb; > * more than one entry but they are guaranteed to have > contiguous > * DMA addresses. > * @vaddr: kernel virtual address of the backing memory > - * @map_noncoherent: if true, the GEM object is backed by non- > coherent memory > + * @cached: if true, the GEM object is backed by cached memory > + * @coherent: This option only meaningful when a GEM object is > cached. > + * If true, Sync the GEM object for DMA access is not > required. > */ > struct drm_gem_dma_object { > struct drm_gem_object base; > @@ -26,7 +28,8 @@ struct drm_gem_dma_object { > /* For objects with DMA memory allocated by GEM DMA */ > void *vaddr; > > - bool map_noncoherent; > + bool cached; > + bool coherent; > }; > > #define to_drm_gem_dma_obj(gem_obj) \