Re: [PATCH] drm/amdkfd: Increase the size of the memory reserved for the TBA

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 2024-02-23 14:05, Laurent Morichetti wrote:
In a future commit, the cwsr trap handler code size for gfx10.1 will
increase to slightly above the one page mark. Since the TMA does not
need to be page aligned, and only 2 pointers are stored in it, push
the TMA offset by 2 KiB and keep the TBA+TMA reserved memory size
to two pages.

Signed-off-by: Laurent Morichetti <laurent.morichetti@xxxxxxx>

Reviewed-by: Felix Kuehling <felix.kuehling@xxxxxxx>


---
  drivers/gpu/drm/amd/amdkfd/kfd_device.c | 23 ++++++++++++++++-------
  drivers/gpu/drm/amd/amdkfd/kfd_priv.h   |  6 +++---
  2 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 4d399c0c8a57..041ec3de55e7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -466,34 +466,43 @@ static void kfd_cwsr_init(struct kfd_dev *kfd)
  {
  	if (cwsr_enable && kfd->device_info.supports_cwsr) {
  		if (KFD_GC_VERSION(kfd) < IP_VERSION(9, 0, 1)) {
-			BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE);
+			BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex)
+					     > KFD_CWSR_TMA_OFFSET);
  			kfd->cwsr_isa = cwsr_trap_gfx8_hex;
  			kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex);
  		} else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 1)) {
-			BUILD_BUG_ON(sizeof(cwsr_trap_arcturus_hex) > PAGE_SIZE);
+			BUILD_BUG_ON(sizeof(cwsr_trap_arcturus_hex)
+					     > KFD_CWSR_TMA_OFFSET);
  			kfd->cwsr_isa = cwsr_trap_arcturus_hex;
  			kfd->cwsr_isa_size = sizeof(cwsr_trap_arcturus_hex);
  		} else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 2)) {
-			BUILD_BUG_ON(sizeof(cwsr_trap_aldebaran_hex) > PAGE_SIZE);
+			BUILD_BUG_ON(sizeof(cwsr_trap_aldebaran_hex)
+					     > KFD_CWSR_TMA_OFFSET);
  			kfd->cwsr_isa = cwsr_trap_aldebaran_hex;
  			kfd->cwsr_isa_size = sizeof(cwsr_trap_aldebaran_hex);
  		} else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3)) {
-			BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_4_3_hex) > PAGE_SIZE);
+			BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_4_3_hex)
+					     > KFD_CWSR_TMA_OFFSET);
  			kfd->cwsr_isa = cwsr_trap_gfx9_4_3_hex;
  			kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_4_3_hex);
  		} else if (KFD_GC_VERSION(kfd) < IP_VERSION(10, 1, 1)) {
-			BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex) > PAGE_SIZE);
+			BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex)
+					     > KFD_CWSR_TMA_OFFSET);
  			kfd->cwsr_isa = cwsr_trap_gfx9_hex;
  			kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_hex);
  		} else if (KFD_GC_VERSION(kfd) < IP_VERSION(10, 3, 0)) {
-			BUILD_BUG_ON(sizeof(cwsr_trap_nv1x_hex) > PAGE_SIZE);
+			BUILD_BUG_ON(sizeof(cwsr_trap_nv1x_hex)
+					     > KFD_CWSR_TMA_OFFSET);
  			kfd->cwsr_isa = cwsr_trap_nv1x_hex;
  			kfd->cwsr_isa_size = sizeof(cwsr_trap_nv1x_hex);
  		} else if (KFD_GC_VERSION(kfd) < IP_VERSION(11, 0, 0)) {
-			BUILD_BUG_ON(sizeof(cwsr_trap_gfx10_hex) > PAGE_SIZE);
+			BUILD_BUG_ON(sizeof(cwsr_trap_gfx10_hex)
+					     > KFD_CWSR_TMA_OFFSET);
  			kfd->cwsr_isa = cwsr_trap_gfx10_hex;
  			kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx10_hex);
  		} else {
+			/* The gfx11 cwsr trap handler must fit inside a single
+			   page. */
  			BUILD_BUG_ON(sizeof(cwsr_trap_gfx11_hex) > PAGE_SIZE);
  			kfd->cwsr_isa = cwsr_trap_gfx11_hex;
  			kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx11_hex);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 80320b8603fc..42d40560cd30 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -99,11 +99,11 @@
  /*
   * Size of the per-process TBA+TMA buffer: 2 pages
   *
- * The first page is the TBA used for the CWSR ISA code. The second
- * page is used as TMA for user-mode trap handler setup in daisy-chain mode.
+ * The first chunk is the TBA used for the CWSR ISA code. The second
+ * chunk is used as TMA for user-mode trap handler setup in daisy-chain mode.
   */
  #define KFD_CWSR_TBA_TMA_SIZE (PAGE_SIZE * 2)
-#define KFD_CWSR_TMA_OFFSET PAGE_SIZE
+#define KFD_CWSR_TMA_OFFSET (PAGE_SIZE + 2048)
#define KFD_MAX_NUM_OF_QUEUES_PER_DEVICE \
  	(KFD_MAX_NUM_OF_PROCESSES *			\

base-commit: 420b2460a743b320868f30e407d4c4685958ea2c



[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux