From: Philip Yang <Philip.Yang@xxxxxxx> Use HMM to get system memory pages address, which will be used to map to GPUs or migrate to vram. Signed-off-by: Philip Yang <Philip.Yang@xxxxxxx> Reviewed-by: Felix Kuehling <Felix.Kuehling@xxxxxxx> Signed-off-by: Felix Kuehling <Felix.Kuehling@xxxxxxx> --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 116 ++++++++++++++++++++++++++- drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 18 +++++ 2 files changed, 133 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 8f147fa4ff94..db72b767b958 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -29,6 +29,15 @@ #include "kfd_priv.h" #include "kfd_svm.h" +static bool +svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni, + const struct mmu_notifier_range *range, + unsigned long cur_seq); + +static const struct mmu_interval_notifier_ops svm_range_mn_ops = { + .invalidate = svm_range_cpu_invalidate_pagetables, +}; + /** * svm_range_unlink - unlink svm_range from lists and interval tree * @prange: svm range structure to be removed @@ -47,6 +56,18 @@ static void svm_range_unlink(struct svm_range *prange) interval_tree_remove(&prange->it_node, &prange->svms->objects); } +static void +svm_range_add_notifier_locked(struct mm_struct *mm, struct svm_range *prange) +{ + pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms, + prange, prange->start, prange->last); + + mmu_interval_notifier_insert_locked(&prange->notifier, mm, + prange->start << PAGE_SHIFT, + prange->npages << PAGE_SHIFT, + &svm_range_mn_ops); +} + /** * svm_range_add_to_svms - add svm range to svms * @prange: svm range structure to be added @@ -66,11 +87,24 @@ static void svm_range_add_to_svms(struct svm_range *prange) interval_tree_insert(&prange->it_node, &prange->svms->objects); } +static void svm_range_remove_notifier(struct svm_range *prange) +{ + pr_debug("remove notifier svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", + prange->svms, prange, + prange->notifier.interval_tree.start >> PAGE_SHIFT, + prange->notifier.interval_tree.last >> PAGE_SHIFT); + + if (prange->notifier.interval_tree.start != 0 && + prange->notifier.interval_tree.last != 0) + mmu_interval_notifier_remove(&prange->notifier); +} + static void svm_range_free(struct svm_range *prange) { pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms, prange, prange->start, prange->last); + mutex_destroy(&prange->lock); kfree(prange); } @@ -103,6 +137,7 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start, INIT_LIST_HEAD(&prange->update_list); INIT_LIST_HEAD(&prange->remove_list); INIT_LIST_HEAD(&prange->insert_list); + mutex_init(&prange->lock); svm_range_set_default_attributes(&prange->preferred_loc, &prange->prefetch_loc, &prange->granularity, &prange->flags); @@ -376,6 +411,65 @@ svm_range_split_head(struct svm_range *prange, struct svm_range *new, return r; } +/* + * Validation+GPU mapping with concurrent invalidation (MMU notifiers) + * + * To prevent concurrent destruction or change of range attributes, the + * svm_read_lock must be held. The caller must not hold the svm_write_lock + * because that would block concurrent evictions and lead to deadlocks. To + * serialize concurrent migrations or validations of the same range, the + * prange->migrate_mutex must be held. + * + * For VRAM ranges, the SVM BO must be allocated and valid (protected by its + * eviction fence. + * + * The following sequence ensures race-free validation and GPU mapping: + * + * 1. Reserve page table (and SVM BO if range is in VRAM) + * 2. hmm_range_fault to get page addresses (if system memory) + * 3. DMA-map pages (if system memory) + * 4-a. Take notifier lock + * 4-b. Check that pages still valid (mmu_interval_read_retry) + * 4-c. Check that the range was not split or otherwise invalidated + * 4-d. Update GPU page table + * 4.e. Release notifier lock + * 5. Release page table (and SVM BO) reservation + */ +static int svm_range_validate_and_map(struct mm_struct *mm, + struct svm_range *prange, + uint32_t gpuidx, bool intr, bool wait) +{ + struct hmm_range *hmm_range; + int r = 0; + + if (!prange->actual_loc) { + r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL, + prange->start << PAGE_SHIFT, + prange->npages, &hmm_range, + false, true); + if (r) { + pr_debug("failed %d to get svm range pages\n", r); + goto unreserve_out; + } + } + + svm_range_lock(prange); + if (!prange->actual_loc) { + if (amdgpu_hmm_range_get_pages_done(hmm_range)) { + r = -EAGAIN; + goto unlock_out; + } + } + + /* TODO: map to GPU */ + +unlock_out: + svm_range_unlock(prange); +unreserve_out: + + return r; +} + static struct svm_range *svm_range_clone(struct svm_range *old) { struct svm_range *new; @@ -516,6 +610,18 @@ svm_range_handle_overlap(struct svm_range_list *svms, struct svm_range *new, return r; } +/** + * svm_range_cpu_invalidate_pagetables - interval notifier callback + * + */ +static bool +svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni, + const struct mmu_notifier_range *range, + unsigned long cur_seq) +{ + return true; +} + void svm_range_list_fini(struct kfd_process *p) { mutex_destroy(&p->svms.lock); @@ -669,6 +775,7 @@ svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size, /* Apply changes as a transaction */ list_for_each_entry_safe(prange, next, &insert_list, insert_list) { svm_range_add_to_svms(prange); + svm_range_add_notifier_locked(mm, prange); } list_for_each_entry(prange, &update_list, update_list) { svm_range_apply_attrs(p, prange, nattr, attrs); @@ -680,6 +787,7 @@ svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size, prange->svms, prange, prange->start, prange->last); svm_range_unlink(prange); + svm_range_remove_notifier(prange); svm_range_free(prange); } @@ -690,7 +798,13 @@ svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size, * case because the rollback wouldn't be guaranteed to work either. */ list_for_each_entry(prange, &update_list, update_list) { - /* TODO */ + r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE, + true, true); + if (r) { + pr_debug("failed %d to map 0x%lx to gpus\n", r, + prange->start); + break; + } } svm_range_debug_dump(svms); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h index 537101dd19a6..10c73348c2c0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -46,11 +46,14 @@ * @remove_list:link list node used to add to remove list * @insert_list:link list node used to add to insert list * @npages: number of pages + * @lock: protect prange start, last, child_list, svm_bo_list + * @saved_flags:save/restore current PF_MEMALLOC flags * @flags: flags defined as KFD_IOCTL_SVM_FLAG_* * @perferred_loc: perferred location, 0 for CPU, or GPU id * @perfetch_loc: last prefetch location, 0 for CPU, or GPU id * @actual_loc: the actual location, 0 for CPU, or GPU id * @granularity:migration granularity, log2 num pages + * @notifier: register mmu interval notifier * @bitmap_access: index bitmap of GPUs which can access the range * @bitmap_aip: index bitmap of GPUs which can access the range in place * @@ -68,15 +71,30 @@ struct svm_range { struct list_head remove_list; struct list_head insert_list; uint64_t npages; + struct mutex lock; + unsigned int saved_flags; uint32_t flags; uint32_t preferred_loc; uint32_t prefetch_loc; uint32_t actual_loc; uint8_t granularity; + struct mmu_interval_notifier notifier; DECLARE_BITMAP(bitmap_access, MAX_GPU_INSTANCE); DECLARE_BITMAP(bitmap_aip, MAX_GPU_INSTANCE); }; +static inline void svm_range_lock(struct svm_range *prange) +{ + mutex_lock(&prange->lock); + prange->saved_flags = memalloc_noreclaim_save(); + +} +static inline void svm_range_unlock(struct svm_range *prange) +{ + memalloc_noreclaim_restore(prange->saved_flags); + mutex_unlock(&prange->lock); +} + int svm_range_list_init(struct kfd_process *p); void svm_range_list_fini(struct kfd_process *p); int svm_ioctl(struct kfd_process *p, enum kfd_ioctl_svm_op op, uint64_t start, -- 2.31.1 _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx