Signed-off-by: Philip Yang <Philip.Yang@xxxxxxx>
---
drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c | 41 ++++++++++++++++++---
include/uapi/linux/kfd_ioctl.h | 6 +++
2 files changed, 41 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
index 9b8169761ec5..9b47657d5160 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
@@ -42,6 +42,7 @@ struct kfd_smi_client {
struct rcu_head rcu;
pid_t pid;
bool suser;
+ u32 drop_count;
};
#define KFD_MAX_KFIFO_SIZE 8192
@@ -103,12 +104,26 @@ static ssize_t kfd_smi_ev_read(struct file *filep, char __user *user,
}
to_copy = min(size, to_copy);
ret = kfifo_out(&client->fifo, buf, to_copy);
- spin_unlock(&client->lock);
if (ret <= 0) {
+ spin_unlock(&client->lock);
ret = -EAGAIN;
goto ret_err;
}
+ if (client->drop_count) {
+ char msg[KFD_SMI_EVENT_MSG_SIZE];
+ int len;
+
+ len = snprintf(msg, sizeof(msg), "%x ", KFD_SMI_EVENT_DROPPED_EVENT);
+ len += snprintf(msg + len, sizeof(msg) - len,
+ KFD_EVENT_FMT_DROPPED_EVENT(ktime_get_boottime_ns(),
+ client->pid, client->drop_count));
+ kfifo_in(&client->fifo, msg, len);
+ client->drop_count = 0;
+ }
+
+ spin_unlock(&client->lock);
+
ret = copy_to_user(user, buf, to_copy);
if (ret) {
ret = -EFAULT;
@@ -173,22 +188,36 @@ static bool kfd_smi_ev_enabled(pid_t pid, struct kfd_smi_client *client,
}
static void add_event_to_kfifo(pid_t pid, struct kfd_node *dev,
- unsigned int smi_event, char *event_msg, int len)
+ unsigned int smi_event, char *event_msg, int event_len)
{
struct kfd_smi_client *client;
+ char msg[KFD_SMI_EVENT_MSG_SIZE];
+ int len = 0;
rcu_read_lock();
list_for_each_entry_rcu(client, &dev->smi_clients, list) {
if (!kfd_smi_ev_enabled(pid, client, smi_event))
continue;
+
spin_lock(&client->lock);
- if (kfifo_avail(&client->fifo) >= len) {
- kfifo_in(&client->fifo, event_msg, len);
+ if (client->drop_count) {
+ len = snprintf(msg, sizeof(msg), "%x ", KFD_SMI_EVENT_DROPPED_EVENT);
+ len += snprintf(msg + len, sizeof(msg) - len,
+ KFD_EVENT_FMT_DROPPED_EVENT(ktime_get_boottime_ns(), pid,
+ client->drop_count));
+ }
+
+ if (kfifo_avail(&client->fifo) >= event_len + len) {
+ if (len)
+ kfifo_in(&client->fifo, msg, len);
+ kfifo_in(&client->fifo, event_msg, event_len);
wake_up_all(&client->wait_queue);
+ client->drop_count = 0;
} else {
- pr_debug("smi_event(EventID: %u): no space left\n",
- smi_event);
+ client->drop_count++;
+ pr_debug("smi_event(EventID: %u): no space left drop_count %d\n",
+ smi_event, client->drop_count);
}
spin_unlock(&client->lock);
}
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index e4ed8fec3294..915d1e7c67fe 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -530,6 +530,7 @@ enum kfd_smi_event {
KFD_SMI_EVENT_QUEUE_EVICTION = 9,
KFD_SMI_EVENT_QUEUE_RESTORE = 10,
KFD_SMI_EVENT_UNMAP_FROM_GPU = 11,
+ KFD_SMI_EVENT_DROPPED_EVENT = 12,
/*
* max event number, as a flag bit to get events from all processes,
@@ -610,6 +611,7 @@ struct kfd_ioctl_smi_events_args {
* rw: 'W' for write page fault, 'R' for read page fault
* rescheduled: 'R' if the queue restore failed and rescheduled to try again
* error_code: migrate failure error code, 0 if no error
+ * drop_count: how many events dropped when fifo is full
*/
#define KFD_EVENT_FMT_UPDATE_GPU_RESET(reset_seq_num, reset_cause)\
"%x %s\n", (reset_seq_num), (reset_cause)
@@ -645,6 +647,10 @@ struct kfd_ioctl_smi_events_args {
"%lld -%d @%lx(%lx) %x %d\n", (ns), (pid), (addr), (size),\
(node), (unmap_trigger)
+#define KFD_EVENT_FMT_DROPPED_EVENT(ns, pid, drop_count)\
+ "%lld -%d %d\n", (ns), (pid), (drop_count)
+
+
/**************************************************************************************************
* CRIU IOCTLs (Checkpoint Restore In Userspace)
*