RE: [PATCH 8/8] drm/ttm: nuke caching placement flags

"Ruhl, Michael J" <michael.j.ruhl@xxxxxxxxx> · Mon, 5 Oct 2020 16:17:00 +0000

>-----Original Message-----
>From: dri-devel <dri-devel-bounces@xxxxxxxxxxxxxxxxxxxxx> On Behalf Of
>Christian König
>Sent: Thursday, October 1, 2020 7:28 AM
>To: dri-devel@xxxxxxxxxxxxxxxxxxxxx; ray.huang@xxxxxxx;
>airlied@xxxxxxxxx; daniel@xxxxxxxx
>Subject: [PATCH 8/8] drm/ttm: nuke caching placement flags
>
>Changing the caching on the fly never really worked
>flawlessly.
>
>So stop this completely and just let drivers specific the
>desired caching in the tt or bus object.
>
>Signed-off-by: Christian König <christian.koenig@xxxxxxx>
>---
> drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 20 +++-------
> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 12 ++----
> drivers/gpu/drm/drm_gem_vram_helper.c      |  7 +---
> drivers/gpu/drm/nouveau/nouveau_bo.c       | 36 +++++------------
> drivers/gpu/drm/qxl/qxl_object.c           | 10 ++---
> drivers/gpu/drm/qxl/qxl_ttm.c              |  2 +-
> drivers/gpu/drm/radeon/radeon_object.c     | 46 +++++-----------------
> drivers/gpu/drm/radeon/radeon_ttm.c        | 18 ++-------
> drivers/gpu/drm/ttm/ttm_agp_backend.c      |  2 +-
> drivers/gpu/drm/ttm/ttm_bo.c               | 44 ++-------------------
> drivers/gpu/drm/ttm/ttm_bo_util.c          | 10 ++---
> drivers/gpu/drm/ttm/ttm_tt.c               | 29 --------------
> drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c | 30 +++++++-------
> include/drm/ttm/ttm_placement.h            | 14 -------
> include/drm/ttm/ttm_tt.h                   | 15 -------
> 15 files changed, 61 insertions(+), 234 deletions(-)
>
>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>index 80bc7177cd45..964f9512dd6e 100644
>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>@@ -137,7 +137,7 @@ void amdgpu_bo_placement_from_domain(struct
>amdgpu_bo *abo, u32 domain)
> 		places[c].fpfn = 0;
> 		places[c].lpfn = 0;
> 		places[c].mem_type = TTM_PL_VRAM;
>-		places[c].flags = TTM_PL_FLAG_WC |
>TTM_PL_FLAG_UNCACHED;
>+		places[c].flags = 0;
>
> 		if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
> 			places[c].lpfn = visible_pfn;
>@@ -154,11 +154,6 @@ void amdgpu_bo_placement_from_domain(struct
>amdgpu_bo *abo, u32 domain)
> 		places[c].lpfn = 0;
> 		places[c].mem_type = TTM_PL_TT;
> 		places[c].flags = 0;
>-		if (flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
>-			places[c].flags |= TTM_PL_FLAG_WC |
>-				TTM_PL_FLAG_UNCACHED;
>-		else
>-			places[c].flags |= TTM_PL_FLAG_CACHED;
> 		c++;
> 	}
>
>@@ -167,11 +162,6 @@ void amdgpu_bo_placement_from_domain(struct
>amdgpu_bo *abo, u32 domain)
> 		places[c].lpfn = 0;
> 		places[c].mem_type = TTM_PL_SYSTEM;
> 		places[c].flags = 0;
>-		if (flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
>-			places[c].flags |= TTM_PL_FLAG_WC |
>-				TTM_PL_FLAG_UNCACHED;
>-		else
>-			places[c].flags |= TTM_PL_FLAG_CACHED;
> 		c++;
> 	}
>
>@@ -179,7 +169,7 @@ void amdgpu_bo_placement_from_domain(struct
>amdgpu_bo *abo, u32 domain)
> 		places[c].fpfn = 0;
> 		places[c].lpfn = 0;
> 		places[c].mem_type = AMDGPU_PL_GDS;
>-		places[c].flags = TTM_PL_FLAG_UNCACHED;
>+		places[c].flags = 0;
> 		c++;
> 	}
>
>@@ -187,7 +177,7 @@ void amdgpu_bo_placement_from_domain(struct
>amdgpu_bo *abo, u32 domain)
> 		places[c].fpfn = 0;
> 		places[c].lpfn = 0;
> 		places[c].mem_type = AMDGPU_PL_GWS;
>-		places[c].flags = TTM_PL_FLAG_UNCACHED;
>+		places[c].flags = 0;
> 		c++;
> 	}
>
>@@ -195,7 +185,7 @@ void amdgpu_bo_placement_from_domain(struct
>amdgpu_bo *abo, u32 domain)
> 		places[c].fpfn = 0;
> 		places[c].lpfn = 0;
> 		places[c].mem_type = AMDGPU_PL_OA;
>-		places[c].flags = TTM_PL_FLAG_UNCACHED;
>+		places[c].flags = 0;
> 		c++;
> 	}
>
>@@ -203,7 +193,7 @@ void amdgpu_bo_placement_from_domain(struct
>amdgpu_bo *abo, u32 domain)
> 		places[c].fpfn = 0;
> 		places[c].lpfn = 0;
> 		places[c].mem_type = TTM_PL_SYSTEM;
>-		places[c].flags = TTM_PL_MASK_CACHING;
>+		places[c].flags = 0;
> 		c++;
> 	}
>
>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>index 5b56a66063fd..8cdec58b9106 100644
>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>@@ -92,7 +92,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object
>*bo,
> 		.fpfn = 0,
> 		.lpfn = 0,
> 		.mem_type = TTM_PL_SYSTEM,
>-		.flags = TTM_PL_MASK_CACHING
>+		.flags = 0
> 	};
>
> 	/* Don't handle scatter gather BOs */
>@@ -538,19 +538,13 @@ static int amdgpu_move_vram_ram(struct
>ttm_buffer_object *bo, bool evict,
> 	placements.fpfn = 0;
> 	placements.lpfn = 0;
> 	placements.mem_type = TTM_PL_TT;
>-	placements.flags = TTM_PL_MASK_CACHING;
>+	placements.flags = 0;
> 	r = ttm_bo_mem_space(bo, &placement, &tmp_mem, ctx);
> 	if (unlikely(r)) {
> 		pr_err("Failed to find GTT space for blit from VRAM\n");
> 		return r;
> 	}
>
>-	/* set caching flags */
>-	r = ttm_tt_set_placement_caching(bo->ttm, tmp_mem.placement);
>-	if (unlikely(r)) {
>-		goto out_cleanup;
>-	}
>-
> 	r = ttm_tt_populate(bo->bdev, bo->ttm, ctx);
> 	if (unlikely(r))
> 		goto out_cleanup;
>@@ -599,7 +593,7 @@ static int amdgpu_move_ram_vram(struct
>ttm_buffer_object *bo, bool evict,
> 	placements.fpfn = 0;
> 	placements.lpfn = 0;
> 	placements.mem_type = TTM_PL_TT;
>-	placements.flags = TTM_PL_MASK_CACHING;
>+	placements.flags = 0;
> 	r = ttm_bo_mem_space(bo, &placement, &tmp_mem, ctx);
> 	if (unlikely(r)) {
> 		pr_err("Failed to find GTT space for blit to VRAM\n");
>diff --git a/drivers/gpu/drm/drm_gem_vram_helper.c
>b/drivers/gpu/drm/drm_gem_vram_helper.c
>index 79151b1c157c..b9c0ea720efd 100644
>--- a/drivers/gpu/drm/drm_gem_vram_helper.c
>+++ b/drivers/gpu/drm/drm_gem_vram_helper.c
>@@ -147,15 +147,12 @@ static void drm_gem_vram_placement(struct
>drm_gem_vram_object *gbo,
>
> 	if (pl_flag & DRM_GEM_VRAM_PL_FLAG_VRAM) {
> 		gbo->placements[c].mem_type = TTM_PL_VRAM;
>-		gbo->placements[c++].flags = TTM_PL_FLAG_WC |
>-					     TTM_PL_FLAG_UNCACHED |
>-					     invariant_flags;
>+		gbo->placements[c++].flags = invariant_flags;
> 	}
>
> 	if (pl_flag & DRM_GEM_VRAM_PL_FLAG_SYSTEM || !c) {
> 		gbo->placements[c].mem_type = TTM_PL_SYSTEM;
>-		gbo->placements[c++].flags = TTM_PL_MASK_CACHING |
>-					     invariant_flags;
>+		gbo->placements[c++].flags = invariant_flags;
> 	}
>
> 	gbo->placement.num_placement = c;
>diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c
>b/drivers/gpu/drm/nouveau/nouveau_bo.c
>index 8ed30f471ec7..b37dfd12c7b9 100644
>--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
>+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
>@@ -343,37 +343,23 @@ nouveau_bo_new(struct nouveau_cli *cli, u64 size,
>int align,
> }
>
> static void
>-set_placement_list(struct nouveau_drm *drm, struct ttm_place *pl,
>unsigned *n,
>-		   uint32_t domain, uint32_t flags)
>+set_placement_list(struct ttm_place *pl, unsigned *n, uint32_t domain)
> {
> 	*n = 0;
>
> 	if (domain & NOUVEAU_GEM_DOMAIN_VRAM) {
>-		struct nvif_mmu *mmu = &drm->client.mmu;
>-		const u8 type = mmu->type[drm->ttm.type_vram].type;
>-
> 		pl[*n].mem_type = TTM_PL_VRAM;
>-		pl[*n].flags = flags & ~TTM_PL_FLAG_CACHED;
>-
>-		/* Some BARs do not support being ioremapped WC */
>-		if (drm->client.device.info.family >=
>NV_DEVICE_INFO_V0_TESLA &&
>-		    type & NVIF_MEM_UNCACHED)
>-			pl[*n].flags &= ~TTM_PL_FLAG_WC;
>-
>+		pl[*n].flags = 0;
> 		(*n)++;
> 	}
> 	if (domain & NOUVEAU_GEM_DOMAIN_GART) {
> 		pl[*n].mem_type = TTM_PL_TT;
>-		pl[*n].flags = flags;
>-
>-		if (drm->agp.bridge)
>-			pl[*n].flags &= ~TTM_PL_FLAG_CACHED;
>-
>+		pl[*n].flags = 0;
> 		(*n)++;
> 	}
> 	if (domain & NOUVEAU_GEM_DOMAIN_CPU) {
> 		pl[*n].mem_type = TTM_PL_SYSTEM;
>-		pl[(*n)++].flags = flags;
>+		pl[(*n)++].flags = 0;
> 	}
> }
>
>@@ -415,18 +401,14 @@ void
> nouveau_bo_placement_set(struct nouveau_bo *nvbo, uint32_t domain,
> 			 uint32_t busy)
> {
>-	struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev);
> 	struct ttm_placement *pl = &nvbo->placement;
>-	uint32_t flags = nvbo->force_coherent ? TTM_PL_FLAG_UNCACHED :
>-						TTM_PL_MASK_CACHING;
>
> 	pl->placement = nvbo->placements;
>-	set_placement_list(drm, nvbo->placements, &pl->num_placement,
>-			   domain, flags);
>+	set_placement_list(nvbo->placements, &pl->num_placement,
>domain);
>
> 	pl->busy_placement = nvbo->busy_placements;
>-	set_placement_list(drm, nvbo->busy_placements, &pl-
>>num_busy_placement,
>-			   domain | busy, flags);
>+	set_placement_list(nvbo->busy_placements, &pl-
>>num_busy_placement,
>+			   domain | busy);
>
> 	set_placement_range(nvbo, domain);
> }
>@@ -888,7 +870,7 @@ nouveau_bo_move_flipd(struct ttm_buffer_object
>*bo, bool evict,
> 		.fpfn = 0,
> 		.lpfn = 0,
> 		.mem_type = TTM_PL_TT,
>-		.flags = TTM_PL_MASK_CACHING
>+		.flags = 0
> 	};
> 	struct ttm_placement placement;
> 	struct ttm_resource tmp_reg;
>@@ -930,7 +912,7 @@ nouveau_bo_move_flips(struct ttm_buffer_object
>*bo, bool evict,
> 		.fpfn = 0,
> 		.lpfn = 0,
> 		.mem_type = TTM_PL_TT,
>-		.flags = TTM_PL_MASK_CACHING
>+		.flags = 0
> 	};
> 	struct ttm_placement placement;
> 	struct ttm_resource tmp_reg;
>diff --git a/drivers/gpu/drm/qxl/qxl_object.c
>b/drivers/gpu/drm/qxl/qxl_object.c
>index c8b67e7a3f02..5ba8aac00f5c 100644
>--- a/drivers/gpu/drm/qxl/qxl_object.c
>+++ b/drivers/gpu/drm/qxl/qxl_object.c
>@@ -64,21 +64,21 @@ void qxl_ttm_placement_from_domain(struct qxl_bo
>*qbo, u32 domain)
> 	qbo->placement.busy_placement = qbo->placements;
> 	if (domain == QXL_GEM_DOMAIN_VRAM) {
> 		qbo->placements[c].mem_type = TTM_PL_VRAM;
>-		qbo->placements[c++].flags = TTM_PL_FLAG_CACHED | pflag;
>+		qbo->placements[c++].flags = pflag;
> 	}
> 	if (domain == QXL_GEM_DOMAIN_SURFACE) {
> 		qbo->placements[c].mem_type = TTM_PL_PRIV;
>-		qbo->placements[c++].flags = TTM_PL_FLAG_CACHED | pflag;
>+		qbo->placements[c++].flags = pflag;
> 		qbo->placements[c].mem_type = TTM_PL_VRAM;
>-		qbo->placements[c++].flags = TTM_PL_FLAG_CACHED | pflag;
>+		qbo->placements[c++].flags = pflag;
> 	}
> 	if (domain == QXL_GEM_DOMAIN_CPU) {
> 		qbo->placements[c].mem_type = TTM_PL_SYSTEM;
>-		qbo->placements[c++].flags = TTM_PL_MASK_CACHING |
>pflag;
>+		qbo->placements[c++].flags = pflag;
> 	}
> 	if (!c) {
> 		qbo->placements[c].mem_type = TTM_PL_SYSTEM;
>-		qbo->placements[c++].flags = TTM_PL_MASK_CACHING;
>+		qbo->placements[c++].flags = 0;
> 	}
> 	qbo->placement.num_placement = c;
> 	qbo->placement.num_busy_placement = c;
>diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c
>index e79d4df99790..d535e836be72 100644
>--- a/drivers/gpu/drm/qxl/qxl_ttm.c
>+++ b/drivers/gpu/drm/qxl/qxl_ttm.c
>@@ -56,7 +56,7 @@ static void qxl_evict_flags(struct ttm_buffer_object *bo,
> 		.fpfn = 0,
> 		.lpfn = 0,
> 		.mem_type = TTM_PL_SYSTEM,
>-		.flags = TTM_PL_MASK_CACHING
>+		.flags = 0
> 	};
>
> 	if (!qxl_ttm_bo_is_qxl_bo(bo)) {
>diff --git a/drivers/gpu/drm/radeon/radeon_object.c
>b/drivers/gpu/drm/radeon/radeon_object.c
>index 8c285eb118f9..a0b2c4541a2a 100644
>--- a/drivers/gpu/drm/radeon/radeon_object.c
>+++ b/drivers/gpu/drm/radeon/radeon_object.c
>@@ -113,57 +113,29 @@ void radeon_ttm_placement_from_domain(struct
>radeon_bo *rbo, u32 domain)
> 			rbo->placements[c].fpfn =
> 				rbo->rdev->mc.visible_vram_size >>
>PAGE_SHIFT;
> 			rbo->placements[c].mem_type = TTM_PL_VRAM;
>-			rbo->placements[c++].flags = TTM_PL_FLAG_WC |
>-						     TTM_PL_FLAG_UNCACHED;
>+			rbo->placements[c++].flags = 0;
> 		}
>
> 		rbo->placements[c].fpfn = 0;
> 		rbo->placements[c].mem_type = TTM_PL_VRAM;
>-		rbo->placements[c++].flags = TTM_PL_FLAG_WC |
>-					     TTM_PL_FLAG_UNCACHED;
>+		rbo->placements[c++].flags = 0;
> 	}
>
> 	if (domain & RADEON_GEM_DOMAIN_GTT) {
>-		if (rbo->flags & RADEON_GEM_GTT_UC) {
>-			rbo->placements[c].fpfn = 0;
>-			rbo->placements[c].mem_type = TTM_PL_TT;
>-			rbo->placements[c++].flags =
>TTM_PL_FLAG_UNCACHED;
>-
>-		} else if ((rbo->flags & RADEON_GEM_GTT_WC) ||
>-			   (rbo->rdev->flags & RADEON_IS_AGP)) {
>-			rbo->placements[c].fpfn = 0;
>-			rbo->placements[c].mem_type = TTM_PL_TT;
>-			rbo->placements[c++].flags = TTM_PL_FLAG_WC |
>-				TTM_PL_FLAG_UNCACHED;
>-		} else {
>-			rbo->placements[c].fpfn = 0;
>-			rbo->placements[c].mem_type = TTM_PL_TT;
>-			rbo->placements[c++].flags =
>TTM_PL_FLAG_CACHED;
>-		}
>+		rbo->placements[c].fpfn = 0;
>+		rbo->placements[c].mem_type = TTM_PL_TT;
>+		rbo->placements[c++].flags = 0;
> 	}
>
> 	if (domain & RADEON_GEM_DOMAIN_CPU) {
>-		if (rbo->flags & RADEON_GEM_GTT_UC) {
>-			rbo->placements[c].fpfn = 0;
>-			rbo->placements[c].mem_type = TTM_PL_SYSTEM;
>-			rbo->placements[c++].flags =
>TTM_PL_FLAG_UNCACHED;
>-
>-		} else if ((rbo->flags & RADEON_GEM_GTT_WC) ||
>-		    rbo->rdev->flags & RADEON_IS_AGP) {
>-			rbo->placements[c].fpfn = 0;
>-			rbo->placements[c].mem_type = TTM_PL_SYSTEM;
>-			rbo->placements[c++].flags = TTM_PL_FLAG_WC |
>-				TTM_PL_FLAG_UNCACHED;
>-		} else {
>-			rbo->placements[c].fpfn = 0;
>-			rbo->placements[c].mem_type = TTM_PL_SYSTEM;
>-			rbo->placements[c++].flags =
>TTM_PL_FLAG_CACHED;
>-		}
>+		rbo->placements[c].fpfn = 0;
>+		rbo->placements[c].mem_type = TTM_PL_SYSTEM;
>+		rbo->placements[c++].flags = 0;
> 	}
> 	if (!c) {
> 		rbo->placements[c].fpfn = 0;
> 		rbo->placements[c].mem_type = TTM_PL_SYSTEM;
>-		rbo->placements[c++].flags = TTM_PL_MASK_CACHING;
>+		rbo->placements[c++].flags = 0;
> 	}
>
> 	rbo->placement.num_placement = c;
>diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c
>b/drivers/gpu/drm/radeon/radeon_ttm.c
>index 9b53a1d80632..d6f42fbc81f4 100644
>--- a/drivers/gpu/drm/radeon/radeon_ttm.c
>+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
>@@ -89,7 +89,7 @@ static void radeon_evict_flags(struct ttm_buffer_object
>*bo,
> 		.fpfn = 0,
> 		.lpfn = 0,
> 		.mem_type = TTM_PL_SYSTEM,
>-		.flags = TTM_PL_MASK_CACHING
>+		.flags = 0
> 	};
>
> 	struct radeon_bo *rbo;
>@@ -225,17 +225,12 @@ static int radeon_move_vram_ram(struct
>ttm_buffer_object *bo,
> 	placements.fpfn = 0;
> 	placements.lpfn = 0;
> 	placements.mem_type = TTM_PL_TT;
>-	placements.flags = TTM_PL_MASK_CACHING;
>+	placements.flags = 0;
> 	r = ttm_bo_mem_space(bo, &placement, &tmp_mem, ctx);
> 	if (unlikely(r)) {
> 		return r;
> 	}
>
>-	r = ttm_tt_set_placement_caching(bo->ttm, tmp_mem.placement);
>-	if (unlikely(r)) {
>-		goto out_cleanup;
>-	}
>-
> 	r = ttm_tt_populate(bo->bdev, bo->ttm, ctx);
> 	if (unlikely(r)) {
> 		goto out_cleanup;
>@@ -275,7 +270,7 @@ static int radeon_move_ram_vram(struct
>ttm_buffer_object *bo,
> 	placements.fpfn = 0;
> 	placements.lpfn = 0;
> 	placements.mem_type = TTM_PL_TT;
>-	placements.flags = TTM_PL_MASK_CACHING;
>+	placements.flags = 0;
> 	r = ttm_bo_mem_space(bo, &placement, &tmp_mem, ctx);
> 	if (unlikely(r)) {
> 		return r;
>@@ -389,12 +384,7 @@ static int radeon_ttm_io_mem_reserve(struct
>ttm_bo_device *bdev, struct ttm_reso
> 		 * Alpha: use bus.addr to hold the ioremap() return,
> 		 * so we can modify bus.base below.
> 		 */
>-		if (mem->placement & TTM_PL_FLAG_WC)
>-			mem->bus.addr =
>-				ioremap_wc(mem->bus.offset, bus_size);
>-		else
>-			mem->bus.addr =
>-				ioremap(mem->bus.offset, bus_size);
>+		mem->bus.addr = ioremap_wc(mem->bus.offset, bus_size);

Why can you eliminate the ioremap() here?

does this need to be:

if mem->bus.caching instead?

> 		if (!mem->bus.addr)
> 			return -ENOMEM;
>
>diff --git a/drivers/gpu/drm/ttm/ttm_agp_backend.c
>b/drivers/gpu/drm/ttm/ttm_agp_backend.c
>index a723062d37e7..4f76c9287159 100644
>--- a/drivers/gpu/drm/ttm/ttm_agp_backend.c
>+++ b/drivers/gpu/drm/ttm/ttm_agp_backend.c
>@@ -54,7 +54,7 @@ int ttm_agp_bind(struct ttm_tt *ttm, struct
>ttm_resource *bo_mem)
> 	struct page *dummy_read_page = ttm_bo_glob.dummy_read_page;
> 	struct drm_mm_node *node = bo_mem->mm_node;
> 	struct agp_memory *mem;
>-	int ret, cached = (bo_mem->placement & TTM_PL_FLAG_CACHED);
>+	int ret, cached = ttm->caching == ttm_cached;
> 	unsigned i;
>
> 	if (agp_be->mem)
>diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
>index e11e8eaa6602..2fe4cbefde28 100644
>--- a/drivers/gpu/drm/ttm/ttm_bo.c
>+++ b/drivers/gpu/drm/ttm/ttm_bo.c
>@@ -252,10 +252,6 @@ static int ttm_bo_handle_move_mem(struct
>ttm_buffer_object *bo,
> 		if (ret)
> 			goto out_err;
>
>-		ret = ttm_tt_set_placement_caching(bo->ttm, mem-
>>placement);
>-		if (ret)
>-			goto out_err;
>-
> 		if (mem->mem_type != TTM_PL_SYSTEM) {
> 			ret = ttm_tt_populate(bdev, bo->ttm, ctx);
> 			if (ret)
>@@ -854,29 +850,6 @@ static int ttm_bo_mem_force_space(struct
>ttm_buffer_object *bo,
> 	return ttm_bo_add_move_fence(bo, man, mem, ctx->no_wait_gpu);
> }
>
>-static uint32_t ttm_bo_select_caching(struct ttm_resource_manager *man,
>-				      uint32_t cur_placement,
>-				      uint32_t proposed_placement)
>-{
>-	uint32_t caching = proposed_placement & TTM_PL_MASK_CACHING;
>-	uint32_t result = proposed_placement & ~TTM_PL_MASK_CACHING;
>-
>-	/**
>-	 * Keep current caching if possible.
>-	 */
>-
>-	if ((cur_placement & caching) != 0)
>-		result |= (cur_placement & caching);
>-	else if ((TTM_PL_FLAG_CACHED & caching) != 0)
>-		result |= TTM_PL_FLAG_CACHED;
>-	else if ((TTM_PL_FLAG_WC & caching) != 0)
>-		result |= TTM_PL_FLAG_WC;
>-	else if ((TTM_PL_FLAG_UNCACHED & caching) != 0)
>-		result |= TTM_PL_FLAG_UNCACHED;
>-
>-	return result;
>-}
>-
> /**
>  * ttm_bo_mem_placement - check if placement is compatible
>  * @bo: BO to find memory for
>@@ -895,18 +868,13 @@ static int ttm_bo_mem_placement(struct
>ttm_buffer_object *bo,
> {
> 	struct ttm_bo_device *bdev = bo->bdev;
> 	struct ttm_resource_manager *man;
>-	uint32_t cur_flags = 0;
>
> 	man = ttm_manager_type(bdev, place->mem_type);
> 	if (!man || !ttm_resource_manager_used(man))
> 		return -EBUSY;
>
>-	cur_flags = ttm_bo_select_caching(man, bo->mem.placement,
>-					  place->flags);
>-	cur_flags |= place->flags & ~TTM_PL_MASK_CACHING;
>-
> 	mem->mem_type = place->mem_type;
>-	mem->placement = cur_flags;
>+	mem->placement = place->flags;
>
> 	spin_lock(&ttm_bo_glob.lru_lock);
> 	ttm_bo_del_from_lru(bo);
>@@ -1039,8 +1007,7 @@ static bool ttm_bo_places_compat(const struct
>ttm_place *places,
> 			continue;
>
> 		*new_flags = heap->flags;
>-		if ((*new_flags & mem->placement &
>TTM_PL_MASK_CACHING) &&
>-		    (mem->mem_type == heap->mem_type) &&
>+		if ((mem->mem_type == heap->mem_type) &&
> 		    (!(*new_flags & TTM_PL_FLAG_CONTIGUOUS) ||
> 		     (mem->placement & TTM_PL_FLAG_CONTIGUOUS)))
> 			return true;
>@@ -1094,9 +1061,6 @@ int ttm_bo_validate(struct ttm_buffer_object *bo,
> 		ret = ttm_bo_move_buffer(bo, placement, ctx);
> 		if (ret)
> 			return ret;
>-	} else {
>-		bo->mem.placement &= TTM_PL_MASK_CACHING;
>-		bo->mem.placement |= new_flags &
>~TTM_PL_MASK_CACHING;
> 	}
> 	/*
> 	 * We might need to add a TTM.
>@@ -1164,7 +1128,7 @@ int ttm_bo_init_reserved(struct ttm_bo_device
>*bdev,
> 	bo->mem.bus.offset = 0;
> 	bo->mem.bus.addr = NULL;
> 	bo->moving = NULL;
>-	bo->mem.placement = TTM_PL_FLAG_CACHED;
>+	bo->mem.placement = 0;
> 	bo->acc_size = acc_size;
> 	bo->pin_count = 0;
> 	bo->sg = sg;
>@@ -1512,7 +1476,7 @@ int ttm_bo_swapout(struct ttm_bo_global *glob,
>struct ttm_operation_ctx *ctx)
>
> 		evict_mem = bo->mem;
> 		evict_mem.mm_node = NULL;
>-		evict_mem.placement = TTM_PL_MASK_CACHING;
>+		evict_mem.placement = 0;
> 		evict_mem.mem_type = TTM_PL_SYSTEM;
>
> 		ret = ttm_bo_handle_move_mem(bo, &evict_mem, true,
>&ctx);
>diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c
>b/drivers/gpu/drm/ttm/ttm_bo_util.c
>index 0542097dc419..ba7ab5ed85d0 100644
>--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
>+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
>@@ -72,10 +72,6 @@ int ttm_bo_move_ttm(struct ttm_buffer_object *bo,
> 		old_mem->mem_type = TTM_PL_SYSTEM;
> 	}
>
>-	ret = ttm_tt_set_placement_caching(ttm, new_mem->placement);
>-	if (unlikely(ret != 0))
>-		return ret;
>-
> 	if (new_mem->mem_type != TTM_PL_SYSTEM) {
>
> 		ret = ttm_tt_populate(bo->bdev, ttm, ctx);
>@@ -135,7 +131,7 @@ static int ttm_resource_ioremap(struct ttm_bo_device
>*bdev,
> 	} else {
> 		size_t bus_size = (size_t)mem->num_pages << PAGE_SHIFT;
>
>-		if (mem->placement & TTM_PL_FLAG_WC)
>+		if (mem->bus.caching == ttm_write_combined)
> 			addr = ioremap_wc(mem->bus.offset, bus_size);
> 		else
> 			addr = ioremap(mem->bus.offset, bus_size);
>@@ -427,7 +423,7 @@ static int ttm_bo_ioremap(struct ttm_buffer_object
>*bo,
> 		map->virtual = (void *)(((u8 *)bo->mem.bus.addr) + offset);
> 	} else {
> 		map->bo_kmap_type = ttm_bo_map_iomap;
>-		if (mem->placement & TTM_PL_FLAG_WC)
>+		if (mem->bus.caching == ttm_write_combined)
> 			map->virtual = ioremap_wc(bo->mem.bus.offset +
>offset,
> 						  size);
> 		else
>@@ -457,7 +453,7 @@ static int ttm_bo_kmap_ttm(struct ttm_buffer_object
>*bo,
> 	if (ret)
> 		return ret;
>
>-	if (num_pages == 1 && (mem->placement &
>TTM_PL_FLAG_CACHED)) {
>+	if (num_pages == 1 && ttm->caching == ttm_cached) {
> 		/*
> 		 * We're mapping a single page, and the desired
> 		 * page protection is consistent with the bo.
>diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
>index a465f51df027..3e5dd6271d4c 100644
>--- a/drivers/gpu/drm/ttm/ttm_tt.c
>+++ b/drivers/gpu/drm/ttm/ttm_tt.c
>@@ -114,35 +114,6 @@ static int ttm_sg_tt_alloc_page_directory(struct
>ttm_dma_tt *ttm)
> 	return 0;
> }
>
>-static int ttm_tt_set_caching(struct ttm_tt *ttm, enum ttm_caching caching)
>-{
>-	if (ttm->caching == caching)
>-		return 0;
>-
>-	/* Can't change the caching state after TT is populated */
>-	if (WARN_ON_ONCE(ttm_tt_is_populated(ttm)))
>-		return -EINVAL;
>-
>-	ttm->caching = caching;
>-
>-	return 0;
>-}
>-
>-int ttm_tt_set_placement_caching(struct ttm_tt *ttm, uint32_t placement)
>-{
>-	enum ttm_caching state;
>-
>-	if (placement & TTM_PL_FLAG_WC)
>-		state = ttm_write_combined;
>-	else if (placement & TTM_PL_FLAG_UNCACHED)
>-		state = ttm_uncached;
>-	else
>-		state = ttm_cached;
>-
>-	return ttm_tt_set_caching(ttm, state);
>-}
>-EXPORT_SYMBOL(ttm_tt_set_placement_caching);
>-
> void ttm_tt_destroy_common(struct ttm_bo_device *bdev, struct ttm_tt
>*ttm)
> {
> 	ttm_tt_unpopulate(bdev, ttm);
>diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
>b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
>index 2a7b5f964776..975d06c2e35e 100644
>--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
>+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
>@@ -34,28 +34,28 @@ static const struct ttm_place vram_placement_flags = {
> 	.fpfn = 0,
> 	.lpfn = 0,
> 	.mem_type = TTM_PL_VRAM,
>-	.flags = TTM_PL_FLAG_CACHED
>+	.flags = 0
> };
>
> static const struct ttm_place sys_placement_flags = {
> 	.fpfn = 0,
> 	.lpfn = 0,
> 	.mem_type = TTM_PL_SYSTEM,
>-	.flags = TTM_PL_FLAG_CACHED
>+	.flags = 0
> };
>
> static const struct ttm_place gmr_placement_flags = {
> 	.fpfn = 0,
> 	.lpfn = 0,
> 	.mem_type = VMW_PL_GMR,
>-	.flags = TTM_PL_FLAG_CACHED
>+	.flags = 0
> };
>
> static const struct ttm_place mob_placement_flags = {
> 	.fpfn = 0,
> 	.lpfn = 0,
> 	.mem_type = VMW_PL_MOB,
>-	.flags = TTM_PL_FLAG_CACHED
>+	.flags = 0
> };
>
> struct ttm_placement vmw_vram_placement = {
>@@ -70,12 +70,12 @@ static const struct ttm_place
>vram_gmr_placement_flags[] = {
> 		.fpfn = 0,
> 		.lpfn = 0,
> 		.mem_type = TTM_PL_VRAM,
>-		.flags = TTM_PL_FLAG_CACHED
>+		.flags = 0
> 	}, {
> 		.fpfn = 0,
> 		.lpfn = 0,
> 		.mem_type = VMW_PL_GMR,
>-		.flags = TTM_PL_FLAG_CACHED
>+		.flags = 0
> 	}
> };
>
>@@ -84,12 +84,12 @@ static const struct ttm_place
>gmr_vram_placement_flags[] = {
> 		.fpfn = 0,
> 		.lpfn = 0,
> 		.mem_type = VMW_PL_GMR,
>-		.flags = TTM_PL_FLAG_CACHED
>+		.flags = 0
> 	}, {
> 		.fpfn = 0,
> 		.lpfn = 0,
> 		.mem_type = TTM_PL_VRAM,
>-		.flags = TTM_PL_FLAG_CACHED
>+		.flags = 0
> 	}
> };
>
>@@ -119,22 +119,22 @@ static const struct ttm_place
>evictable_placement_flags[] = {
> 		.fpfn = 0,
> 		.lpfn = 0,
> 		.mem_type = TTM_PL_SYSTEM,
>-		.flags = TTM_PL_FLAG_CACHED
>+		.flags = 0
> 	}, {
> 		.fpfn = 0,
> 		.lpfn = 0,
> 		.mem_type = TTM_PL_VRAM,
>-		.flags = TTM_PL_FLAG_CACHED
>+		.flags = 0
> 	}, {
> 		.fpfn = 0,
> 		.lpfn = 0,
> 		.mem_type = VMW_PL_GMR,
>-		.flags = TTM_PL_FLAG_CACHED
>+		.flags = 0
> 	}, {
> 		.fpfn = 0,
> 		.lpfn = 0,
> 		.mem_type = VMW_PL_MOB,
>-		.flags = TTM_PL_FLAG_CACHED
>+		.flags = 0
> 	}
> };
>
>@@ -143,17 +143,17 @@ static const struct ttm_place
>nonfixed_placement_flags[] = {
> 		.fpfn = 0,
> 		.lpfn = 0,
> 		.mem_type = TTM_PL_SYSTEM,
>-		.flags = TTM_PL_FLAG_CACHED
>+		.flags = 0
> 	}, {
> 		.fpfn = 0,
> 		.lpfn = 0,
> 		.mem_type = VMW_PL_GMR,
>-		.flags = TTM_PL_FLAG_CACHED
>+		.flags = 0
> 	}, {
> 		.fpfn = 0,
> 		.lpfn = 0,
> 		.mem_type = VMW_PL_MOB,
>-		.flags = TTM_PL_FLAG_CACHED
>+		.flags = 0
> 	}
> };
>
>diff --git a/include/drm/ttm/ttm_placement.h
>b/include/drm/ttm/ttm_placement.h
>index 50e72df48b8d..aa6ba4d0cf78 100644
>--- a/include/drm/ttm/ttm_placement.h
>+++ b/include/drm/ttm/ttm_placement.h
>@@ -43,27 +43,13 @@
> #define TTM_PL_PRIV             3
>
> /*
>- * Other flags that affects data placement.
>- * TTM_PL_FLAG_CACHED indicates cache-coherent mappings
>- * if available.
>- * TTM_PL_FLAG_SHARED means that another application may
>- * reference the buffer.
>- * TTM_PL_FLAG_NO_EVICT means that the buffer may never
>- * be evicted to make room for other buffers.
>  * TTM_PL_FLAG_TOPDOWN requests to be placed from the
>  * top of the memory area, instead of the bottom.
>  */
>
>-#define TTM_PL_FLAG_CACHED      (1 << 16)
>-#define TTM_PL_FLAG_UNCACHED    (1 << 17)
>-#define TTM_PL_FLAG_WC          (1 << 18)
> #define TTM_PL_FLAG_CONTIGUOUS  (1 << 19)
> #define TTM_PL_FLAG_TOPDOWN     (1 << 22)

Do these bit shifts mean anything anymore?

This series is a nice cleanup.  For me it makes the caching mechanism a lot more clear.

If the removal of the ioremap() (comment above) is ok,:

Reviewed-by: Michael J. Ruhl <michael.j.ruhl@xxxxxxxxx>

m

>-#define TTM_PL_MASK_CACHING     (TTM_PL_FLAG_CACHED | \
>-				 TTM_PL_FLAG_UNCACHED | \
>-				 TTM_PL_FLAG_WC)
>-
> /**
>  * struct ttm_place
>  *
>diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h
>index c39c722d5184..e042dec5e6c1 100644
>--- a/include/drm/ttm/ttm_tt.h
>+++ b/include/drm/ttm/ttm_tt.h
>@@ -164,21 +164,6 @@ void ttm_tt_destroy_common(struct ttm_bo_device
>*bdev, struct ttm_tt *ttm);
>  * Swap in a previously swap out ttm_tt.
>  */
> int ttm_tt_swapin(struct ttm_tt *ttm);
>-
>-/**
>- * ttm_tt_set_placement_caching:
>- *
>- * @ttm A struct ttm_tt the backing pages of which will change caching policy.
>- * @placement: Flag indicating the desired caching policy.
>- *
>- * This function will change caching policy of any default kernel mappings of
>- * the pages backing @ttm. If changing from cached to uncached or
>- * write-combined,
>- * all CPU caches will first be flushed to make sure the data of the pages
>- * hit RAM. This function may be very costly as it involves global TLB
>- * and cache flushes and potential page splitting / combining.
>- */
>-int ttm_tt_set_placement_caching(struct ttm_tt *ttm, uint32_t placement);
> int ttm_tt_swapout(struct ttm_bo_device *bdev, struct ttm_tt *ttm);
>
> /**
>--
>2.17.1
>
>_______________________________________________
>dri-devel mailing list
>dri-devel@xxxxxxxxxxxxxxxxxxxxx
>https://lists.freedesktop.org/mailman/listinfo/dri-devel
_______________________________________________
dri-devel mailing list
dri-devel@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/dri-devel