The TI k3-j721s2 platform has a bug relating to cache snooping on the AXI ACE-Lite interface. Disabling cache snooping altogether would also resolve the issue, but is considered more of a performance hit. Given the platform is dma-coherent, forcing all device-accessible memory allocations through the CPU cache is the preferred solution. Implement this workaround so that it can later be enabled for the TI k3-j721s2 platform. Signed-off-by: Matt Coster <matt.coster@xxxxxxxxxx> --- Changes in v2: - None - Link to v1: https://lore.kernel.org/r/20241105-sets-bxs-4-64-patch-v1-v1-19-4ed30e865892@xxxxxxxxxx --- drivers/gpu/drm/imagination/pvr_device.c | 11 ++++++++++- drivers/gpu/drm/imagination/pvr_device.h | 11 +++++++++++ drivers/gpu/drm/imagination/pvr_drv.c | 2 +- drivers/gpu/drm/imagination/pvr_gem.c | 3 +++ drivers/gpu/drm/imagination/pvr_gem.h | 7 +++++-- drivers/gpu/drm/imagination/pvr_mmu.c | 7 ++++++- 6 files changed, 36 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/imagination/pvr_device.c b/drivers/gpu/drm/imagination/pvr_device.c index 2ce46b9a8ab7609faebeeb4e7820751b00047806..ffc177c383c1be16061eff0290c347918b0991f7 100644 --- a/drivers/gpu/drm/imagination/pvr_device.c +++ b/drivers/gpu/drm/imagination/pvr_device.c @@ -24,6 +24,7 @@ #include <linux/gfp.h> #include <linux/interrupt.h> #include <linux/platform_device.h> +#include <linux/property.h> #include <linux/pm_runtime.h> #include <linux/slab.h> #include <linux/stddef.h> @@ -635,6 +636,7 @@ bool pvr_device_overrides_validate(struct pvr_device *pvr_dev, const struct pvr_device_overrides *overrides) { + struct drm_device *drm_dev = from_pvr_device(pvr_dev); bool ret = true; /* @@ -643,7 +645,14 @@ pvr_device_overrides_validate(struct pvr_device *pvr_dev, * * Note that this function may be called early during device initialization * so it should not be assumed that @pvr_dev is ready for normal use yet. - */ + */ + + if (overrides->device_memory_force_cpu_cached && + device_get_dma_attr(drm_dev->dev) != DEV_DMA_COHERENT) { + drm_err(drm_dev, + "Specifying device_memory_force_cpu_cached override without dma-coherent attribute is unsupported."); + ret = false; + } return ret; } diff --git a/drivers/gpu/drm/imagination/pvr_device.h b/drivers/gpu/drm/imagination/pvr_device.h index ad0a02a37154099542247dfc62f411c10f4e41f4..7ae14899db24f4c747e8cf4d61d252eb403713f4 100644 --- a/drivers/gpu/drm/imagination/pvr_device.h +++ b/drivers/gpu/drm/imagination/pvr_device.h @@ -60,8 +60,19 @@ struct pvr_fw_version { /** * struct pvr_device_overrides - Hardware-level overrides loaded from * MODULE_DEVICE_TABLE() or similar. + * + * @device_memory_force_cpu_cached: By default, all device memory buffer objects + * are mapped write-combined on the CPU (see %PVR_BO_CPU_CACHED) including MMU + * page table backing pages which do not use the regular device memory objects. + * This override forces all CPU mappings to be mapped cached instead. Since this + * could require additional cache maintenance operations to be performed, + * pvr_device_overrides_validate() ensures that the dma-coherent attribute is + * set when this override is specified. Required on some TI platforms where a + * bug causes device-to-cpu cache snooping to behave incorrectly when + * interacting with cpu-uncached memory. */ struct pvr_device_overrides { + bool device_memory_force_cpu_cached; }; /** diff --git a/drivers/gpu/drm/imagination/pvr_drv.c b/drivers/gpu/drm/imagination/pvr_drv.c index b56ee2cda9b54c4388a6eef38b0ff81acdb05874..e074cfb0d2055b5387dbb142ca972108977f9854 100644 --- a/drivers/gpu/drm/imagination/pvr_drv.c +++ b/drivers/gpu/drm/imagination/pvr_drv.c @@ -1490,7 +1490,7 @@ static void pvr_remove(struct platform_device *plat_dev) pvr_power_domains_fini(pvr_dev); } -static const struct pvr_device_overrides pvr_device_overrides_default = {}; +static const struct pvr_device_overrides pvr_device_overrides_default = { 0 }; /* * Always specify &pvr_device_overrides_default instead of %NULL for &struct of_device_id->data so diff --git a/drivers/gpu/drm/imagination/pvr_gem.c b/drivers/gpu/drm/imagination/pvr_gem.c index 6a8c81fe8c1e85c2130a4fe90fce35b6a2be35aa..c67c30518f89af3de2e617a9b65e5cd78870fa2c 100644 --- a/drivers/gpu/drm/imagination/pvr_gem.c +++ b/drivers/gpu/drm/imagination/pvr_gem.c @@ -345,6 +345,9 @@ pvr_gem_object_create(struct pvr_device *pvr_dev, size_t size, u64 flags) if (size == 0 || !pvr_gem_object_flags_validate(flags)) return ERR_PTR(-EINVAL); + if (PVR_HAS_OVERRIDE(pvr_dev, device_memory_force_cpu_cached)) + flags |= PVR_BO_CPU_CACHED; + shmem_obj = drm_gem_shmem_create(from_pvr_device(pvr_dev), size); if (IS_ERR(shmem_obj)) return ERR_CAST(shmem_obj); diff --git a/drivers/gpu/drm/imagination/pvr_gem.h b/drivers/gpu/drm/imagination/pvr_gem.h index e0e5ea509a2e88a437b8d241ea13c7bab2220f56..9b3cbcbe48dfbbc8be211a8a409699a43452e178 100644 --- a/drivers/gpu/drm/imagination/pvr_gem.h +++ b/drivers/gpu/drm/imagination/pvr_gem.h @@ -44,8 +44,11 @@ struct pvr_file; * Bits not defined anywhere are "undefined". * * CPU mapping options - * :PVR_BO_CPU_CACHED: By default, all GEM objects are mapped write-combined on the CPU. Set this - * flag to override this behaviour and map the object cached. + * :PVR_BO_CPU_CACHED: By default, all GEM objects are mapped write-combined on the CPU. Set + * this flag to override this behaviour and map the object cached. If + * &struct pvr_device_overrides->device_memory_force_cpu_cached is specified, all allocations + * will be mapped as if this flag was set. This does not require any additional consideration + * at allocation time since the override is only valid if the dma-coherent attribute is set. * * Firmware options * :PVR_BO_FW_NO_CLEAR_ON_RESET: By default, all FW objects are cleared and reinitialised on hard diff --git a/drivers/gpu/drm/imagination/pvr_mmu.c b/drivers/gpu/drm/imagination/pvr_mmu.c index 4fe70610ed94cf707e631f8148af081a94f97327..7c7deb29b735308eaed26900f2f54a838382c255 100644 --- a/drivers/gpu/drm/imagination/pvr_mmu.c +++ b/drivers/gpu/drm/imagination/pvr_mmu.c @@ -259,6 +259,7 @@ pvr_mmu_backing_page_init(struct pvr_mmu_backing_page *page, struct device *dev = from_pvr_device(pvr_dev)->dev; struct page *raw_page; + pgprot_t prot; int err; dma_addr_t dma_addr; @@ -268,7 +269,11 @@ pvr_mmu_backing_page_init(struct pvr_mmu_backing_page *page, if (!raw_page) return -ENOMEM; - host_ptr = vmap(&raw_page, 1, VM_MAP, pgprot_writecombine(PAGE_KERNEL)); + prot = PAGE_KERNEL; + if (!PVR_HAS_OVERRIDE(pvr_dev, device_memory_force_cpu_cached)) + prot = pgprot_writecombine(prot); + + host_ptr = vmap(&raw_page, 1, VM_MAP, prot); if (!host_ptr) { err = -ENOMEM; goto err_free_page; -- 2.47.0