Re: [PATCH v3] drm/amdkfd: Handle errors from svm validate and map

Felix Kuehling <felix.kuehling@xxxxxxx> · Wed, 20 Sep 2023 10:35:25 -0400

On 2023-09-20 10:20, Philip Yang wrote:


On 2023-09-19 17:15, Felix Kuehling wrote:

On 2023-09-19 10:21, Philip Yang wrote:
If new range is splited to multiple pranges with max_svm_range_pages
alignment and added to update_list, svm validate and map should keep
going after error to make sure prange->mapped_to_gpu flag is up to date
for the whole range.

svm validate and map update set prange->mapped_to_gpu after mapping to
GPUs successfully, otherwise clear prange->mapped_to_gpu flag (for
update mapping case) instead of setting error flag, we can remove
the redundant error flag to simpliy code.

Refactor to remove goto and update prange->mapped_to_gpu flag inside
svm_range_lock, to guarant we always evict queues or unmap from GPUs if
there are invalid ranges.

After svm validate and map return error -EAGIN, the caller retry will
update the mapping for the whole range again.

Fixes: c22b04407097 ("drm/amdkfd: flag added to handle errors from 
svm validate and map")
Signed-off-by: Philip Yang <Philip.Yang@xxxxxxx>
---
  drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 78 
+++++++++++++---------------
  drivers/gpu/drm/amd/amdkfd/kfd_svm.h |  1 -
  2 files changed, 36 insertions(+), 43 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 50c29fd844fb..4812f4ac5579 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -818,7 +818,7 @@ svm_range_is_same_attrs(struct kfd_process *p, 
struct svm_range *prange,
          }
      }
  -    return !prange->is_error_flag;
+    return true;
  }
    /**
@@ -1671,7 +1671,7 @@ static int svm_range_validate_and_map(struct 
mm_struct *mm,
        start = prange->start << PAGE_SHIFT;
      end = (prange->last + 1) << PAGE_SHIFT;
-    for (addr = start; addr < end && !r; ) {
+    for (addr = start; !r && addr < end; ) {
          struct hmm_range *hmm_range;
          struct vm_area_struct *vma;
          unsigned long next;
@@ -1680,62 +1680,55 @@ static int svm_range_validate_and_map(struct 
mm_struct *mm,
          bool readonly;
            vma = vma_lookup(mm, addr);
-        if (!vma) {
+        if (vma) {
+            readonly = !(vma->vm_flags & VM_WRITE);
+
+            next = min(vma->vm_end, end);
+            npages = (next - addr) >> PAGE_SHIFT;
+            WRITE_ONCE(p->svms.faulting_task, current);
+            r = amdgpu_hmm_range_get_pages(&prange->notifier, addr, 
npages,
+                               readonly, owner, NULL,
+                               &hmm_range);
+            WRITE_ONCE(p->svms.faulting_task, NULL);
+            if (r) {
+                pr_debug("failed %d to get svm range pages\n", r);
+                if (r == -EBUSY)
+                    r = -EAGAIN;
+            }
+        } else {
              r = -EFAULT;
-            goto unreserve_out;
-        }
-        readonly = !(vma->vm_flags & VM_WRITE);
-
-        next = min(vma->vm_end, end);
-        npages = (next - addr) >> PAGE_SHIFT;
-        WRITE_ONCE(p->svms.faulting_task, current);
-        r = amdgpu_hmm_range_get_pages(&prange->notifier, addr, 
npages,
-                           readonly, owner, NULL,
-                           &hmm_range);
-        WRITE_ONCE(p->svms.faulting_task, NULL);
-        if (r) {
-            pr_debug("failed %d to get svm range pages\n", r);
-            if (r == -EBUSY)
-                r = -EAGAIN;
-            goto unreserve_out;
          }
  -        offset = (addr - start) >> PAGE_SHIFT;
-        r = svm_range_dma_map(prange, ctx->bitmap, offset, npages,
-                      hmm_range->hmm_pfns);
-        if (r) {
-            pr_debug("failed %d to dma map range\n", r);
-            goto unreserve_out;
+        if (!r) {
+            offset = (addr - start) >> PAGE_SHIFT;
+            r = svm_range_dma_map(prange, ctx->bitmap, offset, npages,
+                          hmm_range->hmm_pfns);
+            if (r)
+                pr_debug("failed %d to dma map range\n", r);
          }
            svm_range_lock(prange);
-        if (amdgpu_hmm_range_get_pages_done(hmm_range)) {
+        if (!r && amdgpu_hmm_range_get_pages_done(hmm_range)) {
              pr_debug("hmm update the range, need validate again\n");
              r = -EAGAIN;
-            goto unlock_out;
          }
-        if (!list_empty(&prange->child_list)) {
+
+        if (!r && !list_empty(&prange->child_list)) {
              pr_debug("range split by unmap in parallel, validate 
again\n");
              r = -EAGAIN;
-            goto unlock_out;
          }
  -        r = svm_range_map_to_gpus(prange, offset, npages, readonly,
-                      ctx->bitmap, wait, flush_tlb);
+        if (!r)
+            r = svm_range_map_to_gpus(prange, offset, npages, 
readonly,
+                          ctx->bitmap, wait, flush_tlb);
  -unlock_out:
+        prange->mapped_to_gpu = !r;

I'm still concerned that this can update prange->mapped_to_gpu to 
"true" before the entire range has been successfully mapped. This 
could cause race conditions if someone looks at this variable while a 
validate_and_map is in progress. This would avoid such race conditions:

        if (!r && next == end)
            prange->mapped_to_gpu = true;

thanks, will also add else path for error handling, to exit the loop 
with correct flag.

          else if (r)

             prange->mapped_to_gpu = false;

I thought about that. I think the flag should be false going into the 
function. There should be no need to validate and map the range if it's 
already mapped and valid. So if anything, we should set the flag to 
false in the beginning.

Regards,
  Felix


Regards,

Philip

Regards,
  Felix


          svm_range_unlock(prange);
            addr = next;
      }
  -    if (addr == end)
-        prange->mapped_to_gpu = true;
-
-unreserve_out:
      svm_range_unreserve_bos(ctx);
-
-    prange->is_error_flag = !!r;
      if (!r)
          prange->validate_timestamp = ktime_get_boottime();
  @@ -2104,7 +2097,8 @@ svm_range_add(struct kfd_process *p, 
uint64_t start, uint64_t size,
          next = interval_tree_iter_next(node, start, last);
          next_start = min(node->last, last) + 1;
  -        if (svm_range_is_same_attrs(p, prange, nattr, attrs)) {
+        if (svm_range_is_same_attrs(p, prange, nattr, attrs) &&
+            prange->mapped_to_gpu) {
              /* nothing to do */
          } else if (node->start < start || node->last > last) {
              /* node intersects the update range and its attributes
@@ -3517,7 +3511,7 @@ svm_range_set_attr(struct kfd_process *p, 
struct mm_struct *mm,
      struct svm_range *next;
      bool update_mapping = false;
      bool flush_tlb;
-    int r = 0;
+    int r, ret = 0;
        pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] pages 0x%llx\n",
           p->pasid, &p->svms, start, start + size - 1, size);
@@ -3605,7 +3599,7 @@ svm_range_set_attr(struct kfd_process *p, 
struct mm_struct *mm,
  out_unlock_range:
          mutex_unlock(&prange->migrate_mutex);
          if (r)
-            break;
+            ret = r;
      }
        dynamic_svm_range_dump(svms);
@@ -3618,7 +3612,7 @@ svm_range_set_attr(struct kfd_process *p, 
struct mm_struct *mm,
      pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] done, r=%d\n", 
p->pasid,
           &p->svms, start, start + size - 1, r);
  -    return r;
+    return ret ? ret : r;
  }
    static int
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
index c216c8dd13c6..25f711905738 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
@@ -133,7 +133,6 @@ struct svm_range {
      DECLARE_BITMAP(bitmap_access, MAX_GPU_INSTANCE);
      DECLARE_BITMAP(bitmap_aip, MAX_GPU_INSTANCE);
      bool                mapped_to_gpu;
-    bool                is_error_flag;
  };
    static inline void svm_range_lock(struct svm_range *prange)