Add support for reporting GPU reset events through SMI. Signed-off-by: Mukul Joshi <mukul.joshi@xxxxxxx> --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 2 ++ drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c | 18 ++++++++++++++++++ drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h | 1 + include/uapi/linux/kfd_ioctl.h | 1 + 4 files changed, 22 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index d5e790f046b4..d788aa24ef3f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -811,6 +811,8 @@ int kgd2kfd_pre_reset(struct kfd_dev *kfd) if (!kfd->init_complete) return 0; + kfd_smi_event_update_gpu_reset(kfd); + kfd->dqm->ops.pre_reset(kfd->dqm); kgd2kfd_suspend(kfd, false); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c index 4d4b6e3ab697..4de57923d9f5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c @@ -174,6 +174,24 @@ static void add_event_to_kfifo(struct kfd_dev *dev, unsigned int smi_event, rcu_read_unlock(); } +void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev) +{ + /* + * GpuReset msg = empty + * 1 byte event + 1 byte space + 1 byte \n + 1 byte \0 = 4 + */ + char fifo_in[4]; + int len; + + if (list_empty(&dev->smi_clients)) { + return; + } + + len = snprintf(fifo_in, 4, "%x \n", KFD_SMI_EVENT_GPU_RESET); + + add_event_to_kfifo(dev, KFD_SMI_EVENT_GPU_RESET, fifo_in, len); +} + void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev, uint32_t throttle_bitmask) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h index 15537b2cccb5..ffdb822d120b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h @@ -27,5 +27,6 @@ int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd); void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid); void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev, uint32_t throttle_bitmask); +void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev); #endif diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index cb1f963a84e0..128b6235b540 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -453,6 +453,7 @@ enum kfd_smi_event { KFD_SMI_EVENT_NONE = 0, /* not used */ KFD_SMI_EVENT_VMFAULT = 1, /* event start counting at 1 */ KFD_SMI_EVENT_THERMAL_THROTTLE = 2, + KFD_SMI_EVENT_GPU_RESET = 3, }; #define KFD_SMI_EVENT_MASK_FROM_INDEX(i) (1ULL << ((i) - 1)) -- 2.17.1 _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx