RE: [PATCH 3/5] drm/amdkfd: add xcc instance for debugger APIs

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



[Public]

> -----Original Message-----
> From: Huang, JinHuiEric <JinHuiEric.Huang@xxxxxxx>
> Sent: Wednesday, July 5, 2023 6:57 PM
> To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx
> Cc: Kim, Jonathan <Jonathan.Kim@xxxxxxx>; Huang, JinHuiEric
> <JinHuiEric.Huang@xxxxxxx>
> Subject: [PATCH 3/5] drm/amdkfd: add xcc instance for debugger APIs
>
> Since GFX9 GPU has multiple xcc instances, this is to
> implement this change in KFD for debugger APIs.

This redefines the KGD calls in patch 1 so I think this patch and patch 1 can be squashed.
Spatial partitioning is a known requirement outside of debugging so I don't think there's a need to explicitly point this out for debugger updates in the description.
Some other inline comments ...

>
> Signed-off-by: Eric Huang <jinhuieric.huang@xxxxxxx>
> ---
>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c    |  6 ++++--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c |  6 ++++--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c  | 12 ++++++++---
> -
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h  | 13 +++++++++-
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c  |  6 ++++--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c   | 12 ++++++++----
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h   | 13 +++++++++--
> --
>  drivers/gpu/drm/amd/amdkfd/kfd_debug.c              |  6 ++++--
>  .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c   |  2 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c  |  3 ++-
>  drivers/gpu/drm/amd/include/kgd_kfd_interface.h     | 12 ++++++++----
>  11 files changed, 61 insertions(+), 30 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> index f3f7e0437447..c7f88bfa1976 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> @@ -126,7 +126,8 @@ static uint32_t
> kgd_gfx_aldebaran_set_address_watch(
>                                       uint32_t watch_address_mask,
>                                       uint32_t watch_id,
>                                       uint32_t watch_mode,
> -                                     uint32_t debug_vmid)
> +                                     uint32_t debug_vmid,
> +                                     uint32_t inst )
>  {
>       uint32_t watch_address_high;
>       uint32_t watch_address_low;
> @@ -163,7 +164,8 @@ static uint32_t
> kgd_gfx_aldebaran_set_address_watch(
>  }
>
>  static uint32_t kgd_gfx_aldebaran_clear_address_watch(struct
> amdgpu_device *adev,
> -                                                   uint32_t watch_id)
> +                                                   uint32_t watch_id,
> +                                                   uint32_t inst)

Why do we need to instance this on a 0 return?  I don't think we need to change the prototype here.

>  {
>       return 0;
>  }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
> index 3299e268f234..c0546db91579 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
> @@ -454,7 +454,8 @@ static uint32_t kgd_gfx_v9_4_3_set_address_watch(
>                               uint32_t watch_address_mask,
>                               uint32_t watch_id,
>                               uint32_t watch_mode,
> -                             uint32_t debug_vmid)
> +                             uint32_t debug_vmid,
> +                             uint32_t inst)

Let's use the inst arg in this function instead of hardcoding it to 0.
You're already setting the GC instance to 0 in the caller in this patch anyways so we may as well use the arg here to give context as to why we've updated the prototypes.

>  {
>       uint32_t watch_address_high;
>       uint32_t watch_address_low;
> @@ -491,7 +492,8 @@ static uint32_t kgd_gfx_v9_4_3_set_address_watch(
>  }
>
>  static uint32_t kgd_gfx_v9_4_3_clear_address_watch(struct amdgpu_device
> *adev,
> -                             uint32_t watch_id)
> +                             uint32_t watch_id,
> +                             uint32_t inst)
>  {
>       return 0;
>  }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> index 8ad7a7779e14..04daa8f9456b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> @@ -886,7 +886,8 @@ uint32_t kgd_gfx_v10_set_address_watch(struct
> amdgpu_device *adev,
>                                       uint32_t watch_address_mask,
>                                       uint32_t watch_id,
>                                       uint32_t watch_mode,
> -                                     uint32_t debug_vmid)
> +                                     uint32_t debug_vmid,
> +                                     uint32_t inst)
>  {
>       uint32_t watch_address_high;
>       uint32_t watch_address_low;
> @@ -942,7 +943,8 @@ uint32_t kgd_gfx_v10_set_address_watch(struct
> amdgpu_device *adev,
>  }
>
>  uint32_t kgd_gfx_v10_clear_address_watch(struct amdgpu_device *adev,
> -                                     uint32_t watch_id)
> +                                     uint32_t watch_id,
> +                                     uint32_t inst)
>  {
>       uint32_t watch_address_cntl;
>
> @@ -968,7 +970,8 @@ uint32_t kgd_gfx_v10_clear_address_watch(struct
> amdgpu_device *adev,
>   *     deq_retry_wait_time      -- Wait Count for Global Wave Syncs.
>   */
>  void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev,
> -                                     uint32_t *wait_times)
> +                                     uint32_t *wait_times,
> +                                     uint32_t inst)
>
>  {
>       *wait_times = RREG32(SOC15_REG_OFFSET(GC, 0,
> mmCP_IQ_WAIT_TIME2));
> @@ -978,7 +981,8 @@ void
> kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev,
>                                               uint32_t wait_times,
>                                               uint32_t grace_period,
>                                               uint32_t *reg_offset,
> -                                             uint32_t *reg_data)
> +                                             uint32_t *reg_data,
> +                                             uint32_t inst)
>  {
>       *reg_data = wait_times;
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
> index e6b70196071a..ebe92c55ceed 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
> @@ -44,12 +44,17 @@ uint32_t kgd_gfx_v10_set_address_watch(struct
> amdgpu_device *adev,
>                                       uint32_t watch_address_mask,
>                                       uint32_t watch_id,
>                                       uint32_t watch_mode,
> -                                     uint32_t debug_vmid);
> +                                     uint32_t debug_vmid,
> +                                     uint32_t inst);
>  uint32_t kgd_gfx_v10_clear_address_watch(struct amdgpu_device *adev,
> -                                     uint32_t watch_id);
> -void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev, uint32_t
> *wait_times);
> +                                     uint32_t watch_id,
> +                                     uint32_t inst);
> +void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev,
> +                             uint32_t *wait_times,
> +                             uint32_t inst);
>  void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device
> *adev,
>                                              uint32_t wait_times,
>                                              uint32_t grace_period,
>                                              uint32_t *reg_offset,
> -                                            uint32_t *reg_data);
> +                                            uint32_t *reg_data,
> +                                            uint32_t inst);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c
> index 91c3574ebed3..d5d0ca6a14d4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c
> @@ -743,7 +743,8 @@ static uint32_t kgd_gfx_v11_set_address_watch(struct
> amdgpu_device *adev,
>                                       uint32_t watch_address_mask,
>                                       uint32_t watch_id,
>                                       uint32_t watch_mode,
> -                                     uint32_t debug_vmid)
> +                                     uint32_t debug_vmid,
> +                                     uint32_t inst)
>  {
>       uint32_t watch_address_high;
>       uint32_t watch_address_low;
> @@ -780,7 +781,8 @@ static uint32_t kgd_gfx_v11_set_address_watch(struct
> amdgpu_device *adev,
>  }
>
>  static uint32_t kgd_gfx_v11_clear_address_watch(struct amdgpu_device
> *adev,
> -                                             uint32_t watch_id)
> +                                             uint32_t watch_id,
> +                                             uint32_t inst)
>  {
>       return 0;
>  }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> index 51d93fb13ea3..8164c499aeb2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> @@ -822,7 +822,8 @@ uint32_t kgd_gfx_v9_set_address_watch(struct
> amdgpu_device *adev,
>                                       uint32_t watch_address_mask,
>                                       uint32_t watch_id,
>                                       uint32_t watch_mode,
> -                                     uint32_t debug_vmid)
> +                                     uint32_t debug_vmid,
> +                                     uint32_t inst)
>  {
>       uint32_t watch_address_high;
>       uint32_t watch_address_low;
> @@ -878,7 +879,8 @@ uint32_t kgd_gfx_v9_set_address_watch(struct
> amdgpu_device *adev,
>  }
>
>  uint32_t kgd_gfx_v9_clear_address_watch(struct amdgpu_device *adev,
> -                                     uint32_t watch_id)
> +                                     uint32_t watch_id,
> +                                     uint32_t inst)
>  {
>       uint32_t watch_address_cntl;
>
> @@ -903,7 +905,8 @@ uint32_t kgd_gfx_v9_clear_address_watch(struct
> amdgpu_device *adev,
>   *     deq_retry_wait_time      -- Wait Count for Global Wave Syncs.
>   */
>  void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev,
> -                                     uint32_t *wait_times)
> +                                     uint32_t *wait_times,
> +                                     uint32_t inst)
>
>  {
>       *wait_times = RREG32(SOC15_REG_OFFSET(GC, 0,

Same comment as GC 9.4.3 set_address_watch.  Let's read the proper instance here.

> mmCP_IQ_WAIT_TIME2));
> @@ -1100,7 +1103,8 @@ void
> kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
>               uint32_t wait_times,
>               uint32_t grace_period,
>               uint32_t *reg_offset,
> -             uint32_t *reg_data)
> +             uint32_t *reg_data,
> +             uint32_t inst)
>  {
>       *reg_data = wait_times;

Same comment.  Let's grab the proper instance offset.
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
> index 5f54bff0db49..b3832ee79064 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
> @@ -89,12 +89,17 @@ uint32_t kgd_gfx_v9_set_address_watch(struct
> amdgpu_device *adev,
>                                       uint32_t watch_address_mask,
>                                       uint32_t watch_id,
>                                       uint32_t watch_mode,
> -                                     uint32_t debug_vmid);
> +                                     uint32_t debug_vmid,
> +                                     uint32_t inst);
>  uint32_t kgd_gfx_v9_clear_address_watch(struct amdgpu_device *adev,
> -                                     uint32_t watch_id);
> -void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev, uint32_t
> *wait_times);
> +                                     uint32_t watch_id,
> +                                     uint32_t inst);
> +void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev,
> +                             uint32_t *wait_times,
> +                             uint32_t inst);
>  void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device
> *adev,
>                                              uint32_t wait_times,
>                                              uint32_t grace_period,
>                                              uint32_t *reg_offset,
> -                                            uint32_t *reg_data);
> +                                            uint32_t *reg_data,
> +                                            uint32_t inst);
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> index fff3ccc04fa9..dcc49183364b 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> @@ -427,7 +427,8 @@ int kfd_dbg_trap_clear_dev_address_watch(struct
> kfd_process_device *pdd,
>       amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
>       pdd->watch_points[watch_id] = pdd->dev->kfd2kgd-
> >clear_address_watch(
>                                                       pdd->dev->adev,
> -                                                     watch_id);
> +                                                     watch_id,
> +                                                     0);
>       amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
>
>       if (!pdd->dev->kfd->shared_resources.enable_mes)
> @@ -466,7 +467,8 @@ int kfd_dbg_trap_set_dev_address_watch(struct
> kfd_process_device *pdd,
>                               watch_address_mask,
>                               *watch_id,
>                               watch_mode,
> -                             pdd->dev->vm_info.last_vmid_kfd);
> +                             pdd->dev->vm_info.last_vmid_kfd,
> +                             0);
>       amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
>
>       if (!pdd->dev->kfd->shared_resources.enable_mes)
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index f515cb8f30ca..a2bff3f01359 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -1621,7 +1621,7 @@ static int initialize_cpsch(struct
> device_queue_manager *dqm)
>
>       if (dqm->dev->kfd2kgd->get_iq_wait_times)
>               dqm->dev->kfd2kgd->get_iq_wait_times(dqm->dev->adev,
> -                                     &dqm->wait_times);
> +                                     &dqm->wait_times, 0);

It's okay to hard code this to 0 on call in this patch as this patch is only declaring the instance requirements but I don't see a follow up patch to address the need to instance this properly by the caller (similar to your solution for address watch instance setting in patch 4).
Please implement spatial partitioning requirements on grace period setting as well.

>       return 0;
>  }
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
> index 29a2d0499b67..8fda16e6fee6 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
> @@ -298,7 +298,8 @@ static int pm_set_grace_period_v9(struct
> packet_manager *pm,
>                       pm->dqm->wait_times,
>                       grace_period,
>                       &reg_offset,
> -                     &reg_data);
> +                     &reg_data,
> +                     0);

Same comment as above.  We need to get to proper instance offset to set the grace period at some point.

Thanks,

Jon
>
>       if (grace_period == USE_DEFAULT_GRACE_PERIOD)
>               reg_data = pm->dqm->wait_times;
> diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> index d0df3381539f..006a9d6214e9 100644
> --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> @@ -315,16 +315,20 @@ struct kfd2kgd_calls {
>                                       uint32_t watch_address_mask,
>                                       uint32_t watch_id,
>                                       uint32_t watch_mode,
> -                                     uint32_t debug_vmid);
> +                                     uint32_t debug_vmid,
> +                                     uint32_t inst);
>       uint32_t (*clear_address_watch)(struct amdgpu_device *adev,
> -                     uint32_t watch_id);
> +                     uint32_t watch_id,
> +                     uint32_t inst);
>       void (*get_iq_wait_times)(struct amdgpu_device *adev,
> -                     uint32_t *wait_times);
> +                     uint32_t *wait_times,
> +                     uint32_t inst);
>       void (*build_grace_period_packet_info)(struct amdgpu_device *adev,
>                       uint32_t wait_times,
>                       uint32_t grace_period,
>                       uint32_t *reg_offset,
> -                     uint32_t *reg_data);
> +                     uint32_t *reg_data,
> +                     uint32_t inst);
>       void (*get_cu_occupancy)(struct amdgpu_device *adev, int pasid,
>                       int *wave_cnt, int *max_waves_per_cu, uint32_t
> inst);
>       void (*program_trap_handler_settings)(struct amdgpu_device *adev,
> --
> 2.34.1





[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux