RE: [PATCH] drm/amdkfd: fix and enable debugging for gfx11

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



[Public]

+ Felix (typo on email)

> -----Original Message-----
> From: Kim, Jonathan <Jonathan.Kim@xxxxxxx>
> Sent: Wednesday, June 7, 2023 1:27 PM
> To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx
> Cc: Felix.Kueling@xxxxxxx; Huang, JinHuiEric <JinHuiEric.Huang@xxxxxxx>;
> Kim, Jonathan <Jonathan.Kim@xxxxxxx>
> Subject: [PATCH] drm/amdkfd: fix and enable debugging for gfx11
>
> There are a few fixes required to enable gfx11 debugging.
>
> First, ADD_QUEUE.trap_en is an inappropriate place to toggle
> a per-process register so move it to SET_SHADER_DEBUGGER.trap_en.
> When ADD_QUEUE.skip_process_ctx_clear is set, MES will prioritize
> the SET_SHADER_DEBUGGER.trap_en setting.
>
> Second, to preserve correct save/restore priviledged wave states
> in coordination with the trap enablement setting, resume suspended
> waves early in the disable call.
>
> Finally, displaced single stepping can cause non-fatal illegal
> instructions during process termination on debug disable.  To work
> around this, stall the waves prior to disable and allow clean
> up to happen naturally on process termination.
>
> NOTE: The AMDGPU_MES_VERSION_MASK check is a place holder as
> MES FW updates have been reviewed but is awaiting binary
> creation.  Once the binaries have been created, this check may
> be subject to change.
>
> Signed-off-by: Jonathan Kim <jonathan.kim@xxxxxxx>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c       |  5 ++-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h       |  4 ++-
>  drivers/gpu/drm/amd/amdgpu/mes_v11_0.c        |  1 +
>  drivers/gpu/drm/amd/amdkfd/kfd_debug.c        | 31 ++++++++++++++-----
>  .../drm/amd/amdkfd/kfd_device_queue_manager.c |  3 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_topology.c     | 12 ++++---
>  drivers/gpu/drm/amd/include/mes_v11_api_def.h |  1 +
>  7 files changed, 40 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> index 20cc3fffe921..95d69f9c7361 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> @@ -928,7 +928,8 @@ int amdgpu_mes_set_shader_debugger(struct
> amdgpu_device *adev,
>                               uint64_t process_context_addr,
>                               uint32_t spi_gdbg_per_vmid_cntl,
>                               const uint32_t *tcp_watch_cntl,
> -                             uint32_t flags)
> +                             uint32_t flags,
> +                             bool trap_en)
>  {
>       struct mes_misc_op_input op_input = {0};
>       int r;
> @@ -945,6 +946,8 @@ int amdgpu_mes_set_shader_debugger(struct
> amdgpu_device *adev,
>       memcpy(op_input.set_shader_debugger.tcp_watch_cntl,
> tcp_watch_cntl,
>
>       sizeof(op_input.set_shader_debugger.tcp_watch_cntl));
>
> +     op_input.set_shader_debugger.trap_en = trap_en;
> +
>       amdgpu_mes_lock(&adev->mes);
>
>       r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> index b5f5eed2b5ef..2d6ac30b7135 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> @@ -294,6 +294,7 @@ struct mes_misc_op_input {
>                       } flags;
>                       uint32_t spi_gdbg_per_vmid_cntl;
>                       uint32_t tcp_watch_cntl[4];
> +                     uint32_t trap_en;
>               } set_shader_debugger;
>       };
>  };
> @@ -361,7 +362,8 @@ int amdgpu_mes_set_shader_debugger(struct
> amdgpu_device *adev,
>                               uint64_t process_context_addr,
>                               uint32_t spi_gdbg_per_vmid_cntl,
>                               const uint32_t *tcp_watch_cntl,
> -                             uint32_t flags);
> +                             uint32_t flags,
> +                             bool trap_en);
>
>  int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
>                       int queue_type, int idx,
> diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> index c4e3cb8d44de..1bdaa00c0b46 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> @@ -347,6 +347,7 @@ static int mes_v11_0_misc_op(struct amdgpu_mes
> *mes,
>               memcpy(misc_pkt.set_shader_debugger.tcp_watch_cntl,
>                               input->set_shader_debugger.tcp_watch_cntl,
>
>       sizeof(misc_pkt.set_shader_debugger.tcp_watch_cntl));
> +             misc_pkt.set_shader_debugger.trap_en = input-
> >set_shader_debugger.trap_en;
>               break;
>       default:
>               DRM_ERROR("unsupported misc op (%d) \n", input->op);
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> index 125274445f43..e7bc07068eed 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> @@ -349,12 +349,30 @@ int kfd_dbg_set_mes_debug_mode(struct
> kfd_process_device *pdd)
>  {
>       uint32_t spi_dbg_cntl = pdd->spi_dbg_override | pdd-
> >spi_dbg_launch_mode;
>       uint32_t flags = pdd->process->dbg_flags;
> +     bool sq_trap_en = !!spi_dbg_cntl;
>
>       if (!kfd_dbg_is_per_vmid_supported(pdd->dev))
>               return 0;
>
> +     /*
> +      * For displaced single stepping, the debugger inserts s_trap
> instructions
> +      * from user space.
> +      * This can race with the CWSR workaround during a non-IOCTL
> disable and
> +      * cause non-fatal trailing SQ illegal instructions.
> +      * As a work around, stall waves indefinitely in this case as the
> process
> +      * queues will terminate anyways.  MES will automatically clean up
> the
> +      * SPI debug HW registers when all queues are unmapped.
> +      * IOCTL disable will not hit these cases as the program needs to be
> in a
> +      * non-continued state to make the disable call in the first place so
> +      * debugger insertion of s_trap in debug memory will never occur.
> +      */
> +     if (KFD_GC_VERSION(pdd->dev) >= IP_VERSION(11, 0, 0) &&
> +         KFD_GC_VERSION(pdd->dev) < IP_VERSION(12, 0, 0) &&
> +         !pdd->process->mm && !sq_trap_en)
> +             spi_dbg_cntl |= 0x1; /* Set
> SPI_GDBG_PER_VMID_CNTL.stall_vmid */
> +
>       return amdgpu_mes_set_shader_debugger(pdd->dev->adev, pdd-
> >proc_ctx_gpu_addr, spi_dbg_cntl,
> -                                             pdd->watch_points, flags);
> +                                             pdd->watch_points, flags,
> sq_trap_en);
>  }
>
>  #define KFD_DEBUGGER_INVALID_WATCH_POINT_ID -1
> @@ -557,6 +575,10 @@ void kfd_dbg_trap_deactivate(struct kfd_process
> *target, bool unwind, int unwind
>
>       if (!unwind) {
>               uint32_t flags = 0;
> +             int resume_count = resume_queues(target, 0, NULL);
> +
> +             if (resume_count)
> +                     pr_debug("Resumed %d queues\n", resume_count);
>
>               cancel_work_sync(&target->debug_event_workarea);
>               kfd_dbg_clear_process_address_watch(target);
> @@ -598,13 +620,6 @@ void kfd_dbg_trap_deactivate(struct kfd_process
> *target, bool unwind, int unwind
>       }
>
>       kfd_dbg_set_workaround(target, false);
> -
> -     if (!unwind) {
> -             int resume_count = resume_queues(target, 0, NULL);
> -
> -             if (resume_count)
> -                     pr_debug("Resumed %d queues\n", resume_count);
> -     }
>  }
>
>  static void kfd_dbg_clean_exception_status(struct kfd_process *target)
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index 0c1be91a87c6..ff0a28760494 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -227,8 +227,7 @@ static int add_queue_mes(struct
> device_queue_manager *dqm, struct queue *q,
>       queue_input.tba_addr = qpd->tba_addr;
>       queue_input.tma_addr = qpd->tma_addr;
>       queue_input.trap_en = KFD_GC_VERSION(q->device) <
> IP_VERSION(11, 0, 0) ||
> -                           KFD_GC_VERSION(q->device) >= IP_VERSION(12, 0,
> 0) ||
> -                           q->properties.is_dbg_wa;
> +                           KFD_GC_VERSION(q->device) >= IP_VERSION(12, 0,
> 0);
>       queue_input.skip_process_ctx_clear = qpd->pqm->process-
> >debug_trap_enabled;
>
>       queue_type = convert_to_mes_queue_type(q->properties.type);
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> index faa7939f35bd..90b86a6ac7bd 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> @@ -1863,13 +1863,15 @@ static void
> kfd_topology_set_dbg_firmware_support(struct kfd_topology_device *de
>  {
>       bool firmware_supported = true;
>
> -     /*
> -      * FIXME: GFX11 FW currently not sufficient to deal with CWSR WA.
> -      * Updated FW with API changes coming soon.
> -      */
>       if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(11, 0, 0) &&
>                       KFD_GC_VERSION(dev->gpu) < IP_VERSION(12, 0, 0)) {
> -             firmware_supported = false;
> +             uint32_t mes_api_rev = (dev->gpu->adev->mes.sched_version
> &
> +
>       AMDGPU_MES_API_VERSION_MASK) >>
> +
>       AMDGPU_MES_API_VERSION_SHIFT;
> +             uint32_t mes_rev = dev->gpu->adev->mes.sched_version &
> +
>       AMDGPU_MES_VERSION_MASK;
> +
> +             firmware_supported = (mes_api_rev >= 14) && (mes_rev >=
> 64);
>               goto out;
>       }
>
> diff --git a/drivers/gpu/drm/amd/include/mes_v11_api_def.h
> b/drivers/gpu/drm/amd/include/mes_v11_api_def.h
> index f3c15f18ddb5..0997e999416a 100644
> --- a/drivers/gpu/drm/amd/include/mes_v11_api_def.h
> +++ b/drivers/gpu/drm/amd/include/mes_v11_api_def.h
> @@ -575,6 +575,7 @@ struct SET_SHADER_DEBUGGER {
>       } flags;
>       uint32_t spi_gdbg_per_vmid_cntl;
>       uint32_t tcp_watch_cntl[4]; /* TCP_WATCHx_CNTL */
> +     uint32_t trap_en;
>  };
>
>  union MESAPI__MISC {
> --
> 2.25.1





[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux