On 2023-09-29 10:11, Philip Yang wrote:
Align unmap range start and last address to granularity boundary. Skip unmap if range is already unmapped from GPUs.
This only handles unmap due to MMU notifiers with XNACK on. What
about svm_range_unmap_from_cpu?
Regards,
Felix
This also solve the rocgdb CWSR migration related issue. Signed-off-by: Philip Yang <Philip.Yang@xxxxxxx> --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 35 ++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 626e0dd4ec79..ac65bf25c685 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -2004,6 +2004,26 @@ static void svm_range_restore_work(struct work_struct *work) mmput(mm); } +static unsigned long +svm_range_align_start(struct svm_range *prange, unsigned long start) +{ + unsigned long start_align; + + start_align = ALIGN_DOWN(start, 1UL << prange->granularity); + start_align = max_t(unsigned long, start_align, prange->start); + return start_align; +} + +static unsigned long +svm_range_align_last(struct svm_range *prange, unsigned long last) +{ + unsigned long last_align; + + last_align = ALIGN(last, 1UL << prange->granularity) - 1;
I think this should be
last_align = ALIGN(last + 1, 1UL << prange->granularity) - 1;
Otherwise you're off by one granule when (last & (1UL
<< prange->granularity)) == 0.
+ last_align = min_t(unsigned long, last_align, prange->last); + return last_align; +} + /** * svm_range_evict - evict svm range * @prange: svm range structure @@ -2078,6 +2098,12 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm, unsigned long s, l; uint32_t trigger; + if (!svm_range_partial_mapped(prange, start, last)) { + pr_debug("svms 0x%p [0x%lx 0x%lx] unmapped already\n", + prange->svms, start, last); + return 0; + } + if (event == MMU_NOTIFY_MIGRATE) trigger = KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY_MIGRATE; else @@ -2085,16 +2111,17 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm, pr_debug("invalidate unmap svms 0x%p [0x%lx 0x%lx] from GPUs\n", prange->svms, start, last); + list_for_each_entry(pchild, &prange->child_list, child_list) { mutex_lock_nested(&pchild->lock, 1); - s = max(start, pchild->start); - l = min(last, pchild->last); + s = svm_range_align_start(pchild, start); + l = svm_range_align_last(pchild, last); if (l >= s) svm_range_unmap_from_gpus(pchild, s, l, trigger); mutex_unlock(&pchild->lock); } - s = max(start, prange->start); - l = min(last, prange->last); + s = svm_range_align_start(prange, start); + l = svm_range_align_last(prange, last); if (l >= s) svm_range_unmap_from_gpus(prange, s, l, trigger); }