Re: [PATCH v3] drm/amdgpu: Add EXT_COHERENT memory allocation flags

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



I'd like to see checks for nonsensical flag combinations in the kernel mode driver as well. We can only make some of those combinations valid and meaningful in the future, if we implement well defined behaviour (i.e. return an error) now. Otherwise we risk breaking misbehaving user mode applications expecting a particular undefined behaviour later on.

Regards,
  Felix


On 2023-09-06 15:25, Yat Sin, David wrote:
[AMD Official Use Only - General]

Reviewed-by: David Yat Sin <David.YatSin@xxxxxxx>

-----Original Message-----
From: Kuehling, Felix <Felix.Kuehling@xxxxxxx>
Sent: Friday, July 28, 2023 4:00 PM
To: Francis, David <David.Francis@xxxxxxx>; amd-gfx@xxxxxxxxxxxxxxxxxxxxx;
Yat Sin, David <David.YatSin@xxxxxxx>
Subject: Re: [PATCH v3] drm/amdgpu: Add EXT_COHERENT memory allocation
flags

On 2023-07-28 15:39, David Francis wrote:
These flags (for GEM and SVM allocations) allocate memory that allows
for system-scope atomic semantics.

On GFX943 these flags cause caches to be avoided on non-local memory.

On all other ASICs they are identical in functionality to the
equivalent COHERENT flags.

Corresponding Thunk patch is at
https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface/pull/88

v3: changed name of flag

Signed-off-by: David Francis <David.Francis@xxxxxxx>
I made one comment on the user mode patch regarding the explicit handling
of invalid combinations of Uncached, Coherent, ExtCoherent flags. I'm not
sure what we agreed on any more. But I don't think we want to just leave it up
to chance. Other than that, this patch looks good to me.

Regards,
    Felix


---
   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c |  2 ++
   drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c      |  1 +
   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c           |  1 +
   drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c           |  1 +
   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c            |  5 ++++-
   drivers/gpu/drm/amd/amdkfd/kfd_svm.c             | 10 +++++++++-
   include/uapi/drm/amdgpu_drm.h                    | 10 +++++++++-
   include/uapi/linux/kfd_ioctl.h                   |  3 +++
   8 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index d34c3ef8f3ed..a1ce261f2d06 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1738,6 +1738,8 @@ int
amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
     if (flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT)
             alloc_flags |= AMDGPU_GEM_CREATE_COHERENT;
+   if (flags & KFD_IOC_ALLOC_MEM_FLAGS_EXT_COHERENT)
+           alloc_flags |= AMDGPU_GEM_CREATE_EXT_COHERENT;
     if (flags & KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED)
             alloc_flags |= AMDGPU_GEM_CREATE_UNCACHED;

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index 12210598e5b8..76b618735dc0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -331,6 +331,7 @@ amdgpu_dma_buf_create_obj(struct drm_device
*dev,
struct dma_buf *dma_buf)

             flags |= other->flags &
(AMDGPU_GEM_CREATE_CPU_GTT_USWC |
                                      AMDGPU_GEM_CREATE_COHERENT
|
+
AMDGPU_GEM_CREATE_EXT_COHERENT |
AMDGPU_GEM_CREATE_UNCACHED);
     }

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 6b430e10d38e..301ffe30824f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -632,6 +632,7 @@ static void gmc_v10_0_get_vm_pte(struct
amdgpu_device *adev,
     }

     if (bo && bo->flags & (AMDGPU_GEM_CREATE_COHERENT |
+                          AMDGPU_GEM_CREATE_EXT_COHERENT |
                            AMDGPU_GEM_CREATE_UNCACHED))
             *flags = (*flags & ~AMDGPU_PTE_MTYPE_NV10_MASK) |
                      AMDGPU_PTE_MTYPE_NV10(MTYPE_UC); diff --git
a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index a6ee0220db56..846894e212e7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -540,6 +540,7 @@ static void gmc_v11_0_get_vm_pte(struct
amdgpu_device *adev,
     }

     if (bo && bo->flags & (AMDGPU_GEM_CREATE_COHERENT |
+                          AMDGPU_GEM_CREATE_EXT_COHERENT |
                            AMDGPU_GEM_CREATE_UNCACHED))
             *flags = (*flags & ~AMDGPU_PTE_MTYPE_NV10_MASK) |
                      AMDGPU_PTE_MTYPE_NV10(MTYPE_UC); diff --git
a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 880460cd3239..92a623e130d9 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1183,7 +1183,8 @@ static void gmc_v9_0_get_coherence_flags(struct
amdgpu_device *adev,
   {
     struct amdgpu_device *bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
     bool is_vram = bo->tbo.resource->mem_type == TTM_PL_VRAM;
-   bool coherent = bo->flags & AMDGPU_GEM_CREATE_COHERENT;
+   bool coherent = bo->flags & (AMDGPU_GEM_CREATE_COHERENT |
AMDGPU_GEM_CREATE_EXT_COHERENT);
+   bool ext_coherent = bo->flags &
AMDGPU_GEM_CREATE_EXT_COHERENT;
     bool uncached = bo->flags & AMDGPU_GEM_CREATE_UNCACHED;
     struct amdgpu_vm *vm = mapping->bo_va->base.vm;
     unsigned int mtype_local, mtype;
@@ -1251,6 +1252,8 @@ static void gmc_v9_0_get_coherence_flags(struct
amdgpu_device *adev,
             snoop = true;
             if (uncached) {
                     mtype = MTYPE_UC;
+           } else if (ext_coherent) {
+                   mtype = is_local ? MTYPE_CC : MTYPE_UC;
             } else if (adev->flags & AMD_IS_APU) {
                     mtype = is_local ? mtype_local : MTYPE_NC;
             } else {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 1b50eae051a4..e9ffcfc5dedc 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1155,7 +1155,8 @@ svm_range_get_pte_flags(struct kfd_node
*node,
     uint32_t mapping_flags = 0;
     uint64_t pte_flags;
     bool snoop = (domain != SVM_RANGE_VRAM_DOMAIN);
-   bool coherent = flags & KFD_IOCTL_SVM_FLAG_COHERENT;
+   bool coherent = flags & (KFD_IOCTL_SVM_FLAG_COHERENT |
KFD_IOCTL_SVM_FLAG_EXT_COHERENT);
+   bool ext_coherent = flags & KFD_IOCTL_SVM_FLAG_EXT_COHERENT;
     bool uncached = false; /*flags &
KFD_IOCTL_SVM_FLAG_UNCACHED;*/
     unsigned int mtype_local;

@@ -1203,6 +1204,13 @@ svm_range_get_pte_flags(struct kfd_node
*node,
             snoop = true;
             if (uncached) {
                     mapping_flags |= AMDGPU_VM_MTYPE_UC;
+           } else if (ext_coherent) {
+                   /* local HBM region close to partition */
+                   if (bo_node->adev == node->adev &&
+                       (!bo_node->xcp || !node->xcp || bo_node->xcp-
mem_id == node->xcp->mem_id))
+                           mapping_flags |= AMDGPU_VM_MTYPE_CC;
+                   else
+                           mapping_flags |= AMDGPU_VM_MTYPE_UC;
             } else if (domain == SVM_RANGE_VRAM_DOMAIN) {
                     /* local HBM region close to partition */
                     if (bo_node->adev == node->adev && diff --git
a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index
79b14828d542..629860dbc9ec 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -145,7 +145,7 @@ extern "C" {
    */
   #define AMDGPU_GEM_CREATE_DISCARDABLE             (1 << 12)
   /* Flag that BO is shared coherently between multiple devices or CPU
threads.
- * May depend on GPU instructions to flush caches explicitly
+ * May depend on GPU instructions to flush caches to system scope
explicitly.
    *
    * This influences the choice of MTYPE in the PTEs on GFXv9 and later GPUs
and
    * may override the MTYPE selected in AMDGPU_VA_OP_MAP.
@@ -158,6 +158,14 @@ extern "C" {
    * may override the MTYPE selected in AMDGPU_VA_OP_MAP.
    */
   #define AMDGPU_GEM_CREATE_UNCACHED                (1 << 14)
+/* Flag that BO should be coherent across devices when using
+device-level
+ * atomics. May depend on GPU instructions to flush caches to device
+scope
+ * explicitly, promoting them to system scope automatically.
+ *
+ * This influences the choice of MTYPE in the PTEs on GFXv9 and later
+GPUs and
+ * may override the MTYPE selected in AMDGPU_VA_OP_MAP.
+ */
+#define AMDGPU_GEM_CREATE_EXT_COHERENT             (1 << 15)

   struct drm_amdgpu_gem_create_in  {
     /** the requested memory size */
diff --git a/include/uapi/linux/kfd_ioctl.h
b/include/uapi/linux/kfd_ioctl.h index eeb2fdcbdcb7..f0ed68974c54
100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -405,6 +405,7 @@ struct kfd_ioctl_acquire_vm_args {
   #define KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM     (1 << 27)
   #define KFD_IOC_ALLOC_MEM_FLAGS_COHERENT  (1 << 26)
   #define KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED  (1 << 25)
+#define KFD_IOC_ALLOC_MEM_FLAGS_EXT_COHERENT       (1 << 24)

   /* Allocate memory for later SVM (shared virtual memory) mapping.
    *
@@ -659,6 +660,8 @@ enum kfd_mmio_remap {
   #define KFD_IOCTL_SVM_FLAG_GPU_READ_MOSTLY     0x00000020
   /* Keep GPU memory mapping always valid as if XNACK is disable */
   #define KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED   0x00000040
+/* Fine grained coherency between all devices using device-scope atomics
*/
+#define KFD_IOCTL_SVM_FLAG_EXT_COHERENT        0x00000080

   /**
    * kfd_ioctl_svm_op - SVM ioctl operations



[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux