Re: [PATCH v3] drm/amdgpu: Add EXT_COHERENT memory allocation flags

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 2023-07-28 15:39, David Francis wrote:
These flags (for GEM and SVM allocations) allocate
memory that allows for system-scope atomic semantics.

On GFX943 these flags cause caches to be avoided on
non-local memory.

On all other ASICs they are identical in functionality to the
equivalent COHERENT flags.

Corresponding Thunk patch is at
https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface/pull/88

v3: changed name of flag

Signed-off-by: David Francis <David.Francis@xxxxxxx>

I made one comment on the user mode patch regarding the explicit handling of invalid combinations of Uncached, Coherent, ExtCoherent flags. I'm not sure what we agreed on any more. But I don't think we want to just leave it up to chance. Other than that, this patch looks good to me.

Regards,
  Felix


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c |  2 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c      |  1 +
  drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c           |  1 +
  drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c           |  1 +
  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c            |  5 ++++-
  drivers/gpu/drm/amd/amdkfd/kfd_svm.c             | 10 +++++++++-
  include/uapi/drm/amdgpu_drm.h                    | 10 +++++++++-
  include/uapi/linux/kfd_ioctl.h                   |  3 +++
  8 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index d34c3ef8f3ed..a1ce261f2d06 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1738,6 +1738,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
if (flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT)
  		alloc_flags |= AMDGPU_GEM_CREATE_COHERENT;
+	if (flags & KFD_IOC_ALLOC_MEM_FLAGS_EXT_COHERENT)
+		alloc_flags |= AMDGPU_GEM_CREATE_EXT_COHERENT;
  	if (flags & KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED)
  		alloc_flags |= AMDGPU_GEM_CREATE_UNCACHED;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index 12210598e5b8..76b618735dc0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -331,6 +331,7 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf)
flags |= other->flags & (AMDGPU_GEM_CREATE_CPU_GTT_USWC |
  					 AMDGPU_GEM_CREATE_COHERENT |
+					 AMDGPU_GEM_CREATE_EXT_COHERENT |
  					 AMDGPU_GEM_CREATE_UNCACHED);
  	}
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 6b430e10d38e..301ffe30824f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -632,6 +632,7 @@ static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev,
  	}
if (bo && bo->flags & (AMDGPU_GEM_CREATE_COHERENT |
+			       AMDGPU_GEM_CREATE_EXT_COHERENT |
  			       AMDGPU_GEM_CREATE_UNCACHED))
  		*flags = (*flags & ~AMDGPU_PTE_MTYPE_NV10_MASK) |
  			 AMDGPU_PTE_MTYPE_NV10(MTYPE_UC);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index a6ee0220db56..846894e212e7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -540,6 +540,7 @@ static void gmc_v11_0_get_vm_pte(struct amdgpu_device *adev,
  	}
if (bo && bo->flags & (AMDGPU_GEM_CREATE_COHERENT |
+			       AMDGPU_GEM_CREATE_EXT_COHERENT |
  			       AMDGPU_GEM_CREATE_UNCACHED))
  		*flags = (*flags & ~AMDGPU_PTE_MTYPE_NV10_MASK) |
  			 AMDGPU_PTE_MTYPE_NV10(MTYPE_UC);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 880460cd3239..92a623e130d9 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1183,7 +1183,8 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
  {
  	struct amdgpu_device *bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
  	bool is_vram = bo->tbo.resource->mem_type == TTM_PL_VRAM;
-	bool coherent = bo->flags & AMDGPU_GEM_CREATE_COHERENT;
+	bool coherent = bo->flags & (AMDGPU_GEM_CREATE_COHERENT | AMDGPU_GEM_CREATE_EXT_COHERENT);
+	bool ext_coherent = bo->flags & AMDGPU_GEM_CREATE_EXT_COHERENT;
  	bool uncached = bo->flags & AMDGPU_GEM_CREATE_UNCACHED;
  	struct amdgpu_vm *vm = mapping->bo_va->base.vm;
  	unsigned int mtype_local, mtype;
@@ -1251,6 +1252,8 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
  		snoop = true;
  		if (uncached) {
  			mtype = MTYPE_UC;
+		} else if (ext_coherent) {
+			mtype = is_local ? MTYPE_CC : MTYPE_UC;
  		} else if (adev->flags & AMD_IS_APU) {
  			mtype = is_local ? mtype_local : MTYPE_NC;
  		} else {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 1b50eae051a4..e9ffcfc5dedc 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1155,7 +1155,8 @@ svm_range_get_pte_flags(struct kfd_node *node,
  	uint32_t mapping_flags = 0;
  	uint64_t pte_flags;
  	bool snoop = (domain != SVM_RANGE_VRAM_DOMAIN);
-	bool coherent = flags & KFD_IOCTL_SVM_FLAG_COHERENT;
+	bool coherent = flags & (KFD_IOCTL_SVM_FLAG_COHERENT | KFD_IOCTL_SVM_FLAG_EXT_COHERENT);
+	bool ext_coherent = flags & KFD_IOCTL_SVM_FLAG_EXT_COHERENT;
  	bool uncached = false; /*flags & KFD_IOCTL_SVM_FLAG_UNCACHED;*/
  	unsigned int mtype_local;
@@ -1203,6 +1204,13 @@ svm_range_get_pte_flags(struct kfd_node *node,
  		snoop = true;
  		if (uncached) {
  			mapping_flags |= AMDGPU_VM_MTYPE_UC;
+		} else if (ext_coherent) {
+			/* local HBM region close to partition */
+			if (bo_node->adev == node->adev &&
+			    (!bo_node->xcp || !node->xcp || bo_node->xcp->mem_id == node->xcp->mem_id))
+				mapping_flags |= AMDGPU_VM_MTYPE_CC;
+			else
+				mapping_flags |= AMDGPU_VM_MTYPE_UC;
  		} else if (domain == SVM_RANGE_VRAM_DOMAIN) {
  			/* local HBM region close to partition */
  			if (bo_node->adev == node->adev &&
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 79b14828d542..629860dbc9ec 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -145,7 +145,7 @@ extern "C" {
   */
  #define AMDGPU_GEM_CREATE_DISCARDABLE		(1 << 12)
  /* Flag that BO is shared coherently between multiple devices or CPU threads.
- * May depend on GPU instructions to flush caches explicitly
+ * May depend on GPU instructions to flush caches to system scope explicitly.
   *
   * This influences the choice of MTYPE in the PTEs on GFXv9 and later GPUs and
   * may override the MTYPE selected in AMDGPU_VA_OP_MAP.
@@ -158,6 +158,14 @@ extern "C" {
   * may override the MTYPE selected in AMDGPU_VA_OP_MAP.
   */
  #define AMDGPU_GEM_CREATE_UNCACHED		(1 << 14)
+/* Flag that BO should be coherent across devices when using device-level
+ * atomics. May depend on GPU instructions to flush caches to device scope
+ * explicitly, promoting them to system scope automatically.
+ *
+ * This influences the choice of MTYPE in the PTEs on GFXv9 and later GPUs and
+ * may override the MTYPE selected in AMDGPU_VA_OP_MAP.
+ */
+#define AMDGPU_GEM_CREATE_EXT_COHERENT		(1 << 15)
struct drm_amdgpu_gem_create_in {
  	/** the requested memory size */
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index eeb2fdcbdcb7..f0ed68974c54 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -405,6 +405,7 @@ struct kfd_ioctl_acquire_vm_args {
  #define KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM	(1 << 27)
  #define KFD_IOC_ALLOC_MEM_FLAGS_COHERENT	(1 << 26)
  #define KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED	(1 << 25)
+#define KFD_IOC_ALLOC_MEM_FLAGS_EXT_COHERENT	(1 << 24)
/* Allocate memory for later SVM (shared virtual memory) mapping.
   *
@@ -659,6 +660,8 @@ enum kfd_mmio_remap {
  #define KFD_IOCTL_SVM_FLAG_GPU_READ_MOSTLY     0x00000020
  /* Keep GPU memory mapping always valid as if XNACK is disable */
  #define KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED   0x00000040
+/* Fine grained coherency between all devices using device-scope atomics */
+#define KFD_IOCTL_SVM_FLAG_EXT_COHERENT        0x00000080
/**
   * kfd_ioctl_svm_op - SVM ioctl operations



[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux