After migration is finished, output timestamp when migration starts, duration of migration, svm range address and size, GPU id of migration source and destination, svm range attributes, Migration trigger could be prefetch, CPU, GPU page fault and TTM eviction. Signed-off-by: Philip Yang <Philip.Yang@xxxxxxx> --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 67 ++++++++++++++------- drivers/gpu/drm/amd/amdkfd/kfd_migrate.h | 5 +- drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c | 24 ++++++++ drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h | 5 ++ drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 16 +++-- 5 files changed, 86 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index d5d2cf2ee788..eb12f1c3c3fa 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -32,6 +32,7 @@ #include "kfd_priv.h" #include "kfd_svm.h" #include "kfd_migrate.h" +#include "kfd_smi_events.h" #ifdef dev_fmt #undef dev_fmt @@ -405,10 +406,11 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange, static long svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange, struct vm_area_struct *vma, uint64_t start, - uint64_t end) + uint64_t end, uint32_t trigger) { uint64_t npages = (end - start) >> PAGE_SHIFT; - struct kfd_process_device *pdd; + struct kfd_process_device *pdd = NULL; + uint64_t timestamp = ktime_get_ns(); struct dma_fence *mfence = NULL; struct migrate_vma migrate; unsigned long cpages = 0; @@ -434,6 +436,10 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange, migrate.dst = migrate.src + npages; scratch = (dma_addr_t *)(migrate.dst + npages); + pdd = svm_range_get_pdd_by_adev(prange, adev); + if (!pdd) + goto out_free; + r = migrate_vma_setup(&migrate); if (r) { dev_err(adev->dev, "vma setup fail %d range [0x%lx 0x%lx]\n", r, @@ -462,6 +468,11 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange, svm_migrate_copy_done(adev, mfence); migrate_vma_finalize(&migrate); + kfd_smi_event_migration(adev->kfd.dev, pdd->process->pasid, + start >> PAGE_SHIFT, end >> PAGE_SHIFT, + 0, adev->kfd.dev->id, prange->prefetch_loc, + prange->preferred_loc, trigger, timestamp); + svm_range_dma_unmap(adev->dev, scratch, 0, npages); svm_range_free_dma_mappings(prange); @@ -469,10 +480,7 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange, kvfree(buf); out: if (!r && cpages) { - pdd = svm_range_get_pdd_by_adev(prange, adev); - if (pdd) - WRITE_ONCE(pdd->page_in, pdd->page_in + cpages); - + WRITE_ONCE(pdd->page_in, pdd->page_in + cpages); return cpages; } return r; @@ -483,6 +491,7 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange, * @prange: range structure * @best_loc: the device to migrate to * @mm: the process mm structure + * @trigger: reason of migration * * Context: Process context, caller hold mmap read lock, svms lock, prange lock * @@ -491,7 +500,7 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange, */ static int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, - struct mm_struct *mm) + struct mm_struct *mm, uint32_t trigger) { unsigned long addr, start, end; struct vm_area_struct *vma; @@ -528,7 +537,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, break; next = min(vma->vm_end, end); - r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next); + r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next, trigger); if (r < 0) { pr_debug("failed %ld to migrate\n", r); break; @@ -644,12 +653,14 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange, static long svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange, - struct vm_area_struct *vma, uint64_t start, uint64_t end) + struct vm_area_struct *vma, uint64_t start, uint64_t end, + uint32_t trigger) { uint64_t npages = (end - start) >> PAGE_SHIFT; + uint64_t timestamp = ktime_get_ns(); unsigned long upages = npages; unsigned long cpages = 0; - struct kfd_process_device *pdd; + struct kfd_process_device *pdd = NULL; struct dma_fence *mfence = NULL; struct migrate_vma migrate; dma_addr_t *scratch; @@ -674,6 +685,10 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange, migrate.dst = migrate.src + npages; scratch = (dma_addr_t *)(migrate.dst + npages); + pdd = svm_range_get_pdd_by_adev(prange, adev); + if (!pdd) + goto out_free; + r = migrate_vma_setup(&migrate); if (r) { dev_err(adev->dev, "vma setup fail %d range [0x%lx 0x%lx]\n", r, @@ -704,16 +719,19 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange, svm_migrate_copy_done(adev, mfence); migrate_vma_finalize(&migrate); + + kfd_smi_event_migration(adev->kfd.dev, pdd->process->pasid, + start >> PAGE_SHIFT, end >> PAGE_SHIFT, + adev->kfd.dev->id, 0, prange->prefetch_loc, + prange->preferred_loc, trigger, timestamp); + svm_range_dma_unmap(adev->dev, scratch, 0, npages); out_free: kvfree(buf); out: if (!r && cpages) { - pdd = svm_range_get_pdd_by_adev(prange, adev); - if (pdd) - WRITE_ONCE(pdd->page_out, pdd->page_out + cpages); - + WRITE_ONCE(pdd->page_out, pdd->page_out + cpages); return upages; } return r ? r : upages; @@ -723,13 +741,15 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange, * svm_migrate_vram_to_ram - migrate svm range from device to system * @prange: range structure * @mm: process mm, use current->mm if NULL + * @trigger: reason of migration * * Context: Process context, caller hold mmap read lock, svms lock, prange lock * * Return: * 0 - OK, otherwise error code */ -int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm) +int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm, + uint32_t trigger) { struct amdgpu_device *adev; struct vm_area_struct *vma; @@ -767,7 +787,7 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm) break; next = min(vma->vm_end, end); - r = svm_migrate_vma_to_ram(adev, prange, vma, addr, next); + r = svm_migrate_vma_to_ram(adev, prange, vma, addr, next, trigger); if (r < 0) { pr_debug("failed %ld to migrate\n", r); break; @@ -790,6 +810,7 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm) * @prange: range structure * @best_loc: the device to migrate to * @mm: process mm, use current->mm if NULL + * @trigger: reason of migration * * Context: Process context, caller hold mmap read lock, svms lock, prange lock * @@ -798,7 +819,7 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm) */ static int svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc, - struct mm_struct *mm) + struct mm_struct *mm, uint32_t trigger) { int r, retries = 3; @@ -810,7 +831,7 @@ svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc, pr_debug("from gpu 0x%x to gpu 0x%x\n", prange->actual_loc, best_loc); do { - r = svm_migrate_vram_to_ram(prange, mm); + r = svm_migrate_vram_to_ram(prange, mm, trigger); if (r) return r; } while (prange->actual_loc && --retries); @@ -818,17 +839,17 @@ svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc, if (prange->actual_loc) return -EDEADLK; - return svm_migrate_ram_to_vram(prange, best_loc, mm); + return svm_migrate_ram_to_vram(prange, best_loc, mm, trigger); } int svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc, - struct mm_struct *mm) + struct mm_struct *mm, uint32_t trigger) { if (!prange->actual_loc) - return svm_migrate_ram_to_vram(prange, best_loc, mm); + return svm_migrate_ram_to_vram(prange, best_loc, mm, trigger); else - return svm_migrate_vram_to_vram(prange, best_loc, mm); + return svm_migrate_vram_to_vram(prange, best_loc, mm, trigger); } @@ -897,7 +918,7 @@ static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf) goto out_unlock_prange; } - r = svm_migrate_vram_to_ram(prange, mm); + r = svm_migrate_vram_to_ram(prange, mm, MIGRATION_TRIGGER_PAGEFAULT_CPU); if (r) pr_debug("failed %d migrate 0x%p [0x%lx 0x%lx] to ram\n", r, prange, prange->start, prange->last); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h index 2f5b3394c9ed..b3f0754b32fa 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h @@ -41,8 +41,9 @@ enum MIGRATION_COPY_DIR { }; int svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc, - struct mm_struct *mm); -int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm); + struct mm_struct *mm, uint32_t trigger); +int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm, + uint32_t trigger); unsigned long svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c index 6ed3d85348d6..7996438377bc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c @@ -286,6 +286,30 @@ static bool kfd_smi_event_pid_duration(struct kfd_dev *dev, uint16_t pasid, return true; } +void kfd_smi_event_migration(struct kfd_dev *dev, uint16_t pasid, + unsigned long start, unsigned long end, + uint32_t from, uint32_t to, + uint32_t prefetch_loc, uint32_t preferred_loc, + uint32_t trigger, uint64_t ts) +{ + char fifo_in[256]; + uint64_t duration; + pid_t pid; + int len; + + if (!kfd_smi_event_pid_duration(dev, pasid, &pid, ts, &duration)) + return; + + len = snprintf(fifo_in, sizeof(fifo_in), "%d ts=%lld duration=%lld" + " pid=%d pfn=0x%lx npages=0x%lx from=0x%x to=0x%x" + " prefetch_loc=0x%x preferred_loc=0x%x trigger=%d\n", + KFD_SMI_EVENT_MIGRATION, ts, duration, pid, start, + end - start, from, to, prefetch_loc, preferred_loc, + trigger); + + add_event_to_kfifo(pid, dev, KFD_SMI_EVENT_MIGRATION, fifo_in, len); +} + void kfd_smi_event_page_fault(struct kfd_dev *dev, uint16_t pasid, unsigned long address, bool migration, bool write_fault, uint64_t ts) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h index fa3a8fdad69f..7dcc66ac8798 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h @@ -28,6 +28,11 @@ void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid); void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev, uint64_t throttle_bitmask); void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset); +void kfd_smi_event_migration(struct kfd_dev *dev, uint16_t pasid, + unsigned long start, unsigned long end, + uint32_t from, uint32_t to, + uint32_t prefetch_loc, uint32_t preferred_loc, + uint32_t trigger, uint64_t ts); void kfd_smi_event_page_fault(struct kfd_dev *dev, uint16_t pasid, unsigned long address, bool migration, bool write_fault, uint64_t ts); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index b81667162dc1..d7dcc443c16f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -2780,7 +2780,8 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, if (prange->actual_loc != best_loc) { migration = true; if (best_loc) { - r = svm_migrate_to_vram(prange, best_loc, mm); + r = svm_migrate_to_vram(prange, best_loc, mm, + MIGRATION_TRIGGER_PAGEFAULT); if (r) { pr_debug("svm_migrate_to_vram failed (%d) at %llx, falling back to system memory\n", r, addr); @@ -2788,12 +2789,14 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, * VRAM failed */ if (prange->actual_loc) - r = svm_migrate_vram_to_ram(prange, mm); + r = svm_migrate_vram_to_ram(prange, mm, + MIGRATION_TRIGGER_PAGEFAULT); else r = 0; } } else { - r = svm_migrate_vram_to_ram(prange, mm); + r = svm_migrate_vram_to_ram(prange, mm, + MIGRATION_TRIGGER_PAGEFAULT); } if (r) { pr_debug("failed %d to migrate svms %p [0x%lx 0x%lx]\n", @@ -3115,12 +3118,12 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange, return 0; if (!best_loc) { - r = svm_migrate_vram_to_ram(prange, mm); + r = svm_migrate_vram_to_ram(prange, mm, MIGRATION_TRIGGER_PREFETCH); *migrated = !r; return r; } - r = svm_migrate_to_vram(prange, best_loc, mm); + r = svm_migrate_to_vram(prange, best_loc, mm, MIGRATION_TRIGGER_PREFETCH); *migrated = !r; return r; @@ -3177,7 +3180,8 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work) mutex_lock(&prange->migrate_mutex); do { svm_migrate_vram_to_ram(prange, - svm_bo->eviction_fence->mm); + svm_bo->eviction_fence->mm, + MIGRATION_TRIGGER_TTM_EVICTION); } while (prange->actual_loc && --retries); WARN(prange->actual_loc, "Migration failed during eviction"); -- 2.17.1