On Tue, Nov 28, 2017 at 1:29 AM, Felix Kuehling <Felix.Kuehling at amd.com> wrote: > This can be used by KFD for debugging features, such as dumping > HQDs in debugfs. > > Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 71 ++++++++++++++++++++ > drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 80 +++++++++++++++++++++++ > drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 14 ++++ > 3 files changed, 165 insertions(+) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c > index 14333af..12feba8 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c > @@ -105,8 +105,14 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, > uint32_t queue_id, uint32_t __user *wptr, > uint32_t wptr_shift, uint32_t wptr_mask, > struct mm_struct *mm); > +static int kgd_hqd_dump(struct kgd_dev *kgd, > + uint32_t pipe_id, uint32_t queue_id, > + uint32_t (**dump)[2], uint32_t *n_regs); > static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, > uint32_t __user *wptr, struct mm_struct *mm); > +static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, > + uint32_t engine_id, uint32_t queue_id, > + uint32_t (**dump)[2], uint32_t *n_regs); > static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, > uint32_t pipe_id, uint32_t queue_id); > > @@ -178,6 +184,8 @@ static const struct kfd2kgd_calls kfd2kgd = { > .init_interrupts = kgd_init_interrupts, > .hqd_load = kgd_hqd_load, > .hqd_sdma_load = kgd_hqd_sdma_load, > + .hqd_dump = kgd_hqd_dump, > + .hqd_sdma_dump = kgd_hqd_sdma_dump, > .hqd_is_occupied = kgd_hqd_is_occupied, > .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, > .hqd_destroy = kgd_hqd_destroy, > @@ -376,6 +384,42 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, > return 0; > } > > +static int kgd_hqd_dump(struct kgd_dev *kgd, > + uint32_t pipe_id, uint32_t queue_id, > + uint32_t (**dump)[2], uint32_t *n_regs) > +{ > + struct amdgpu_device *adev = get_amdgpu_device(kgd); > + uint32_t i = 0, reg; > +#define HQD_N_REGS (35+4) > +#define DUMP_REG(addr) do { \ > + if (WARN_ON_ONCE(i >= HQD_N_REGS)) \ > + break; \ > + (*dump)[i][0] = (addr) << 2; \ > + (*dump)[i++][1] = RREG32(addr); \ > + } while (0) > + > + *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); > + if (*dump == NULL) > + return -ENOMEM; > + > + acquire_queue(kgd, pipe_id, queue_id); > + > + DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE0); > + DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE1); > + DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE2); > + DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE3); > + > + for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_MQD_CONTROL; reg++) > + DUMP_REG(reg); > + > + release_queue(kgd); > + > + WARN_ON_ONCE(i != HQD_N_REGS); > + *n_regs = i; > + > + return 0; > +} > + > static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, > uint32_t __user *wptr, struct mm_struct *mm) > { > @@ -440,6 +484,33 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, > return 0; > } > > +static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, > + uint32_t engine_id, uint32_t queue_id, > + uint32_t (**dump)[2], uint32_t *n_regs) > +{ > + struct amdgpu_device *adev = get_amdgpu_device(kgd); > + uint32_t sdma_offset = engine_id * SDMA1_REGISTER_OFFSET + > + queue_id * KFD_CIK_SDMA_QUEUE_OFFSET; > + uint32_t i = 0, reg; > +#undef HQD_N_REGS > +#define HQD_N_REGS (19+4) > + > + *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); > + if (*dump == NULL) > + return -ENOMEM; > + > + for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++) > + DUMP_REG(sdma_offset + reg); > + for (reg = mmSDMA0_RLC0_VIRTUAL_ADDR; reg <= mmSDMA0_RLC0_WATERMARK; > + reg++) > + DUMP_REG(sdma_offset + reg); > + > + WARN_ON_ONCE(i != HQD_N_REGS); > + *n_regs = i; > + > + return 0; > +} > + > static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, > uint32_t pipe_id, uint32_t queue_id) > { > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c > index 1d989e4..b380495 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c > @@ -64,8 +64,14 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, > uint32_t queue_id, uint32_t __user *wptr, > uint32_t wptr_shift, uint32_t wptr_mask, > struct mm_struct *mm); > +static int kgd_hqd_dump(struct kgd_dev *kgd, > + uint32_t pipe_id, uint32_t queue_id, > + uint32_t (**dump)[2], uint32_t *n_regs); > static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, > uint32_t __user *wptr, struct mm_struct *mm); > +static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, > + uint32_t engine_id, uint32_t queue_id, > + uint32_t (**dump)[2], uint32_t *n_regs); > static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, > uint32_t pipe_id, uint32_t queue_id); > static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); > @@ -137,6 +143,8 @@ static const struct kfd2kgd_calls kfd2kgd = { > .init_interrupts = kgd_init_interrupts, > .hqd_load = kgd_hqd_load, > .hqd_sdma_load = kgd_hqd_sdma_load, > + .hqd_dump = kgd_hqd_dump, > + .hqd_sdma_dump = kgd_hqd_sdma_dump, > .hqd_is_occupied = kgd_hqd_is_occupied, > .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, > .hqd_destroy = kgd_hqd_destroy, > @@ -365,6 +373,42 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, > return 0; > } > > +static int kgd_hqd_dump(struct kgd_dev *kgd, > + uint32_t pipe_id, uint32_t queue_id, > + uint32_t (**dump)[2], uint32_t *n_regs) > +{ > + struct amdgpu_device *adev = get_amdgpu_device(kgd); > + uint32_t i = 0, reg; > +#define HQD_N_REGS (54+4) > +#define DUMP_REG(addr) do { \ > + if (WARN_ON_ONCE(i >= HQD_N_REGS)) \ > + break; \ > + (*dump)[i][0] = (addr) << 2; \ > + (*dump)[i++][1] = RREG32(addr); \ > + } while (0) > + > + *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); > + if (*dump == NULL) > + return -ENOMEM; > + > + acquire_queue(kgd, pipe_id, queue_id); > + > + DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE0); > + DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE1); > + DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE2); > + DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE3); > + > + for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_HQD_EOP_DONES; reg++) > + DUMP_REG(reg); > + > + release_queue(kgd); > + > + WARN_ON_ONCE(i != HQD_N_REGS); > + *n_regs = i; > + > + return 0; > +} > + > static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, > uint32_t __user *wptr, struct mm_struct *mm) > { > @@ -428,6 +472,42 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, > return 0; > } > > +static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, > + uint32_t engine_id, uint32_t queue_id, > + uint32_t (**dump)[2], uint32_t *n_regs) > +{ > + struct amdgpu_device *adev = get_amdgpu_device(kgd); > + uint32_t sdma_offset = engine_id * SDMA1_REGISTER_OFFSET + > + queue_id * KFD_VI_SDMA_QUEUE_OFFSET; > + uint32_t i = 0, reg; > +#undef HQD_N_REGS > +#define HQD_N_REGS (19+4+2+3+7) > + > + *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); > + if (*dump == NULL) > + return -ENOMEM; > + > + for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++) > + DUMP_REG(sdma_offset + reg); > + for (reg = mmSDMA0_RLC0_VIRTUAL_ADDR; reg <= mmSDMA0_RLC0_WATERMARK; > + reg++) > + DUMP_REG(sdma_offset + reg); > + for (reg = mmSDMA0_RLC0_CSA_ADDR_LO; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; > + reg++) > + DUMP_REG(sdma_offset + reg); > + for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN; reg <= mmSDMA0_RLC0_DUMMY_REG; > + reg++) > + DUMP_REG(sdma_offset + reg); > + for (reg = mmSDMA0_RLC0_MIDCMD_DATA0; reg <= mmSDMA0_RLC0_MIDCMD_CNTL; > + reg++) > + DUMP_REG(sdma_offset + reg); > + > + WARN_ON_ONCE(i != HQD_N_REGS); > + *n_regs = i; > + > + return 0; > +} > + > static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, > uint32_t pipe_id, uint32_t queue_id) > { > diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h > index c6d4e64..fe3079a 100644 > --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h > +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h > @@ -131,6 +131,12 @@ struct tile_config { > * @hqd_sdma_load: Loads the SDMA mqd structure to a H/W SDMA hqd slot. > * used only for no HWS mode. > * > + * @hqd_dump: Dumps CPC HQD registers to an array of address-value pairs. > + * Array is allocated with kmalloc, needs to be freed with kfree by caller. > + * > + * @hqd_sdma_dump: Dumps SDMA HQD registers to an array of address-value pairs. > + * Array is allocated with kmalloc, needs to be freed with kfree by caller. > + * > * @hqd_is_occupies: Checks if a hqd slot is occupied. > * > * @hqd_destroy: Destructs and preempts the queue assigned to that hqd slot. > @@ -187,6 +193,14 @@ struct kfd2kgd_calls { > int (*hqd_sdma_load)(struct kgd_dev *kgd, void *mqd, > uint32_t __user *wptr, struct mm_struct *mm); > > + int (*hqd_dump)(struct kgd_dev *kgd, > + uint32_t pipe_id, uint32_t queue_id, > + uint32_t (**dump)[2], uint32_t *n_regs); > + > + int (*hqd_sdma_dump)(struct kgd_dev *kgd, > + uint32_t engine_id, uint32_t queue_id, > + uint32_t (**dump)[2], uint32_t *n_regs); > + > bool (*hqd_is_occupied)(struct kgd_dev *kgd, uint64_t queue_address, > uint32_t pipe_id, uint32_t queue_id); > > -- > 2.7.4 > This patch is: Acked-by: Oded Gabbay <oded.gabbay at gmail.com>