On 2021-06-21 12:04 p.m., Alex Sierra wrote:
actual_loc should not be used anymore, as pranges
could have mixed locations (VRAM & SYSRAM) at the
same time.
Signed-off-by: Alex Sierra <alex.sierra@xxxxxxx>
---
drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 12 +---
drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 71 ++++++++++--------------
2 files changed, 29 insertions(+), 54 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index f71f8d7e2b72..acb9f64577a0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -501,12 +501,6 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
struct amdgpu_device *adev;
int r = 0;
- if (prange->actual_loc == best_loc) {
- pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n",
- prange->svms, prange->start, prange->last, best_loc);
- return 0;
- }
-
adev = svm_range_get_adev_by_id(prange, best_loc);
if (!adev) {
pr_debug("failed to get device by id 0x%x\n", best_loc);
@@ -791,11 +785,7 @@ int
svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc,
struct mm_struct *mm)
{
- if (!prange->actual_loc)
- return svm_migrate_ram_to_vram(prange, best_loc, mm);
- else
- return svm_migrate_vram_to_vram(prange, best_loc, mm);
-
+ return svm_migrate_ram_to_vram(prange, best_loc, mm);
Can you remove svm_migrate_vram_to_vram in this case? I guess we're
relying on the svm_range_prefault call in svm_migrate_ram_to_vram now to
migrate VRAM in a different XGMI hive to system memory now. But
eventually we want to get rid of that pre-fault hack.
}
/**
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 3b05bc270732..ebc1ae7e5193 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1421,42 +1421,38 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
svm_range_reserve_bos(&ctx);
- if (!prange->actual_loc) {
- p = container_of(prange->svms, struct kfd_process, svms);
- owner = kfd_svm_page_owner(p, find_first_bit(ctx.bitmap,
- MAX_GPU_INSTANCE));
- for_each_set_bit(idx, ctx.bitmap, MAX_GPU_INSTANCE) {
- if (kfd_svm_page_owner(p, idx) != owner) {
- owner = NULL;
- break;
- }
- }
- r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL,
- prange->start << PAGE_SHIFT,
- prange->npages, &hmm_range,
- false, true, owner);
- if (r) {
- pr_debug("failed %d to get svm range pages\n", r);
- goto unreserve_out;
- }
-
- r = svm_range_dma_map(prange, ctx.bitmap,
- hmm_range->hmm_pfns);
- if (r) {
- pr_debug("failed %d to dma map range\n", r);
- goto unreserve_out;
+ p = container_of(prange->svms, struct kfd_process, svms);
+ owner = kfd_svm_page_owner(p, find_first_bit(ctx.bitmap,
+ MAX_GPU_INSTANCE));
+ for_each_set_bit(idx, ctx.bitmap, MAX_GPU_INSTANCE) {
+ if (kfd_svm_page_owner(p, idx) != owner) {
+ owner = NULL;
+ break;
}
+ }
+ r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL,
+ prange->start << PAGE_SHIFT,
+ prange->npages, &hmm_range,
+ false, true, owner);
+ if (r) {
+ pr_debug("failed %d to get svm range pages\n", r);
+ goto unreserve_out;
+ }
- prange->validated_once = true;
+ r = svm_range_dma_map(prange, ctx.bitmap,
+ hmm_range->hmm_pfns);
+ if (r) {
+ pr_debug("failed %d to dma map range\n", r);
+ goto unreserve_out;
}
+ prange->validated_once = true;
+
svm_range_lock(prange);
- if (!prange->actual_loc) {
- if (amdgpu_hmm_range_get_pages_done(hmm_range)) {
- pr_debug("hmm update the range, need validate again\n");
- r = -EAGAIN;
- goto unlock_out;
- }
+ if (amdgpu_hmm_range_get_pages_done(hmm_range)) {
+ pr_debug("hmm update the range, need validate again\n");
+ r = -EAGAIN;
+ goto unlock_out;
IMO, this is the most important part of this commit, and it should be
called out the the path description. Here we use hmm_range_fault for
getting VRAM addresses. This is what enables mixed mappings in the first
place.
Regards,
Felix
}
if (!list_empty(&prange->child_list)) {
pr_debug("range split by unmap in parallel, validate again\n");
@@ -2741,20 +2737,9 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange,
*migrated = false;
best_loc = svm_range_best_prefetch_location(prange);
- if (best_loc == KFD_IOCTL_SVM_LOCATION_UNDEFINED ||
- best_loc == prange->actual_loc)
+ if (best_loc == KFD_IOCTL_SVM_LOCATION_UNDEFINED)
return 0;
- /*
- * Prefetch to GPU without host access flag, set actual_loc to gpu, then
- * validate on gpu and map to gpus will be handled afterwards.
- */
- if (best_loc && !prange->actual_loc &&
- !(prange->flags & KFD_IOCTL_SVM_FLAG_HOST_ACCESS)) {
- prange->actual_loc = best_loc;
- return 0;
- }
-
if (!best_loc) {
r = svm_migrate_vram_to_ram(prange, mm);
*migrated = !r;
_______________________________________________
amd-gfx mailing list
amd-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/amd-gfx