Re: [PATCH v2 4/8] drm/amdkfd: Add GPU recoverable fault SMI event

Felix Kuehling <felix.kuehling@xxxxxxx> · Thu, 27 Jan 2022 18:15:48 -0500

Am 2022-01-20 um 18:13 schrieb Philip Yang:
Output timestamp when GPU recoverable fault starts, ends and duration to
recover the fault, if migration happened or only GPU page table is
updated, fault address, read or write fault.

Signed-off-by: Philip Yang <Philip.Yang@xxxxxxx>
---
  drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c | 48 +++++++++++++++++++++
  drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h |  7 ++-
  drivers/gpu/drm/amd/amdkfd/kfd_svm.c        | 17 ++++++--
  3 files changed, 67 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
index 68c93701c5f7..080eba0d3be0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
@@ -266,6 +266,54 @@ void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid)
  	add_event_to_kfifo(0, dev, KFD_SMI_EVENT_VMFAULT, fifo_in, len);
  }
  
+static bool kfd_smi_event_duration(struct kfd_dev *dev, uint64_t ts,
+				   uint64_t *duration)
+{
+	if (list_empty(&dev->smi_clients))
+		return false;

I'm not sure what this check has to do with the duration calculation. I 
think this should be checked in the caller.

But I'd actually prefer not to include the duration in the message 
because it's redundant. If you have both the start and end event in the 
log, the duration can be easily calculated from the time stamps by 
whatever tool is used to analyze or visualize the log. We do need a way 
to match the start event to the end event. I think the PID and address 
should be good enough for that.


+
+	*duration = ktime_get_boottime_ns() - ts;
+	return true;
+}
+
+void kfd_smi_event_page_fault_start(struct kfd_dev *dev, pid_t pid,
+				    unsigned long address, bool write_fault,
+				    uint64_t ts)
+{
+	char fifo_in[64];
+	int len;
+
+	if (list_empty(&dev->smi_clients))
+		return;
+
+	len = snprintf(fifo_in, sizeof(fifo_in), "%x %lld -%d @%lx(%x) %c\n",
+		       KFD_SMI_EVENT_PAGE_FAULT_START, ts, pid, address,
+		       dev->id, write_fault ? 'W' : 'R');
+
+	add_event_to_kfifo(pid, dev, KFD_SMI_EVENT_PAGE_FAULT_START, fifo_in,
+			   len);
+}
+
+void kfd_smi_event_page_fault_end(struct kfd_dev *dev, pid_t pid,
+				  unsigned long address, bool migration,
+				  uint64_t ts)
+{
+	char fifo_in[64];
+	uint64_t duration;
+	int len;
+
+	if (!kfd_smi_event_duration(dev, ts, &duration))
+		return;
+
+	len = snprintf(fifo_in, sizeof(fifo_in),
+		       "%x %lld(%lld) -%d @%lx(%x) %c\n",
+		       KFD_SMI_EVENT_PAGE_FAULT_END, ktime_get_boottime_ns(),
+		       duration, pid, address, dev->id, migration ? 'M' : 'm');
+
+	add_event_to_kfifo(pid, dev, KFD_SMI_EVENT_PAGE_FAULT_END, fifo_in,
+			   len);
+}
+
  int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd)
  {
  	struct kfd_smi_client *client;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
index bffd0c32b060..7f70db914d2c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
@@ -28,5 +28,10 @@ void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid);
  void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
  					     uint64_t throttle_bitmask);
  void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset);
-
+void kfd_smi_event_page_fault_start(struct kfd_dev *dev, pid_t pid,
+				    unsigned long address, bool write_fault,
+				    uint64_t ts);
+void kfd_smi_event_page_fault_end(struct kfd_dev *dev, pid_t pid,
+				  unsigned long address, bool migration,
+				  uint64_t ts);
  #endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 2d2cae05dbea..08b21f9759ea 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -32,6 +32,7 @@
  #include "kfd_priv.h"
  #include "kfd_svm.h"
  #include "kfd_migrate.h"
+#include "kfd_smi_events.h"
  
  #ifdef dev_fmt
  #undef dev_fmt
@@ -1596,7 +1597,7 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
  	svm_range_unreserve_bos(&ctx);
  
  	if (!r)
-		prange->validate_timestamp = ktime_to_us(ktime_get());
+		prange->validate_timestamp = ktime_get_boottime_ns();
  
  	return r;
  }
@@ -2665,11 +2666,12 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
  	struct svm_range_list *svms;
  	struct svm_range *prange;
  	struct kfd_process *p;
-	uint64_t timestamp;
+	uint64_t timestamp = ktime_get_boottime_ns();
  	int32_t best_loc;
  	int32_t gpuidx = MAX_GPU_INSTANCE;
  	bool write_locked = false;
  	struct vm_area_struct *vma;
+	bool migration = false;
  	int r = 0;
  
  	if (!KFD_IS_SVM_API_SUPPORTED(adev->kfd.dev)) {
@@ -2745,9 +2747,9 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
  		goto out_unlock_range;
  	}
  
-	timestamp = ktime_to_us(ktime_get()) - prange->validate_timestamp;
  	/* skip duplicate vm fault on different pages of same range */
-	if (timestamp < AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING) {
+	if (div_u64(timestamp -  prange->validate_timestamp, 1000000) <
+	    AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING) {

You can probably avoid the division if you redefine 
AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING to be in nanoseconds.

Regards,
  Felix


  		pr_debug("svms 0x%p [0x%lx %lx] already restored\n",
  			 svms, prange->start, prange->last);
  		r = 0;
@@ -2783,7 +2785,11 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
  		 svms, prange->start, prange->last, best_loc,
  		 prange->actual_loc);
  
+	kfd_smi_event_page_fault_start(adev->kfd.dev, p->lead_thread->pid, addr,
+				       write_fault, timestamp);
+
  	if (prange->actual_loc != best_loc) {
+		migration = true;
  		if (best_loc) {
  			r = svm_migrate_to_vram(prange, best_loc, mm);
  			if (r) {
@@ -2812,6 +2818,9 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
  		pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n",
  			 r, svms, prange->start, prange->last);
  
+	kfd_smi_event_page_fault_end(adev->kfd.dev, p->lead_thread->pid, addr,
+				     migration, timestamp);
+
  out_unlock_range:
  	mutex_unlock(&prange->migrate_mutex);
  out_unlock_svms: