Replace prange->mapped_to_gpu with prange->bitmap_mapped[], which is per GPU flag and based on prange granularity, updated when map to GPUS or unmap from GPUs, to optimize multiple GPU map, unmap and retry fault recover. svm_range_partial_mapped is false only if no part of the range mapping on any GPUs. Signed-off-by: Philip Yang <Philip.Yang@xxxxxxx> --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 256 +++++++++++++++++++++------ drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 8 +- 2 files changed, 213 insertions(+), 51 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index fb22b857adbc..4e1af4b181ea 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -296,12 +296,12 @@ static void svm_range_free(struct svm_range *prange, bool do_unmap) KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0); } - /* free dma_addr array for each gpu */ + /* free dma_addr array, bitmap_mapped for each gpu */ for (gpuidx = 0; gpuidx < MAX_GPU_INSTANCE; gpuidx++) { - if (prange->dma_addr[gpuidx]) { + if (prange->dma_addr[gpuidx]) kvfree(prange->dma_addr[gpuidx]); - prange->dma_addr[gpuidx] = NULL; - } + if (prange->bitmap_mapped[gpuidx]) + bitmap_free(prange->bitmap_mapped[gpuidx]); } mutex_destroy(&prange->lock); @@ -327,19 +327,38 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start, uint64_t size = last - start + 1; struct svm_range *prange; struct kfd_process *p; - - prange = kzalloc(sizeof(*prange), GFP_KERNEL); - if (!prange) - return NULL; + unsigned int nbits; + uint32_t gpuidx; p = container_of(svms, struct kfd_process, svms); if (!p->xnack_enabled && update_mem_usage && amdgpu_amdkfd_reserve_mem_limit(NULL, size << PAGE_SHIFT, KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0)) { pr_info("SVM mapping failed, exceeds resident system memory limit\n"); - kfree(prange); return NULL; } + + prange = kzalloc(sizeof(*prange), GFP_KERNEL); + if (!prange) + return NULL; + + svm_range_set_default_attributes(&prange->preferred_loc, + &prange->prefetch_loc, + &prange->granularity, &prange->flags); + + nbits = svm_range_mapped_nbits(start, last, prange->granularity); + pr_debug("prange 0x%p [0x%llx 0x%llx] bitmap_mapped nbits %d\n", prange, + start, last, nbits); + for_each_set_bit(gpuidx, p->svms.bitmap_supported, p->n_pdds) { + prange->bitmap_mapped[gpuidx] = bitmap_zalloc(nbits, GFP_KERNEL); + if (!prange->bitmap_mapped[gpuidx]) { + while (gpuidx--) + bitmap_free(prange->bitmap_mapped[gpuidx]); + kfree(prange); + return NULL; + } + } + prange->npages = size; prange->svms = svms; prange->start = start; @@ -359,10 +378,6 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start, bitmap_copy(prange->bitmap_access, svms->bitmap_supported, MAX_GPU_INSTANCE); - svm_range_set_default_attributes(&prange->preferred_loc, - &prange->prefetch_loc, - &prange->granularity, &prange->flags); - pr_debug("svms 0x%p [0x%llx 0x%llx]\n", svms, start, last); return prange; @@ -984,6 +999,47 @@ svm_range_split_nodes(struct svm_range *new, struct svm_range *old, return 0; } +static int +svm_range_split_bitmap_mapped(struct svm_range *new, struct svm_range *old, + uint64_t start, uint64_t last) +{ + struct kfd_process *p = container_of(new->svms, struct kfd_process, svms); + unsigned int nbits, old_nbits, old_nbits2; + unsigned long *bits; + uint32_t gpuidx; + + nbits = svm_range_mapped_nbits(new->start, new->last, new->granularity); + old_nbits = svm_range_mapped_nbits(old->start, old->last, old->granularity); + old_nbits2 = svm_range_mapped_nbits(start, last, old->granularity); + + pr_debug("old 0x%p [0x%lx 0x%lx] => [0x%llx 0x%llx] nbits %d => %d\n", + old, old->start, old->last, start, last, old_nbits, old_nbits2); + pr_debug("new 0x%p [0x%lx 0x%lx] nbits %d\n", new, new->start, new->last, + nbits); + + for_each_set_bit(gpuidx, p->svms.bitmap_supported, p->n_pdds) { + bits = bitmap_alloc(old_nbits2, GFP_KERNEL); + if (!bits) + return -ENOMEM; + + if (start == old->start) { + bitmap_shift_right(new->bitmap_mapped[gpuidx], + old->bitmap_mapped[gpuidx], + old_nbits2 - 1, old_nbits); + bitmap_copy(bits, old->bitmap_mapped[gpuidx], old_nbits2); + } else { + bitmap_copy(new->bitmap_mapped[gpuidx], + old->bitmap_mapped[gpuidx], nbits); + bitmap_shift_right(bits, old->bitmap_mapped[gpuidx], + nbits - 1, old_nbits); + } + bitmap_free(old->bitmap_mapped[gpuidx]); + old->bitmap_mapped[gpuidx] = bits; + } + + return 0; +} + /** * svm_range_split_adjust - split range and adjust * @@ -1024,6 +1080,10 @@ svm_range_split_adjust(struct svm_range *new, struct svm_range *old, return r; } + r = svm_range_split_bitmap_mapped(new, old, start, last); + if (r) + return r; + old->npages = last - start + 1; old->start = start; old->last = last; @@ -1032,7 +1092,6 @@ svm_range_split_adjust(struct svm_range *new, struct svm_range *old, new->prefetch_loc = old->prefetch_loc; new->actual_loc = old->actual_loc; new->granularity = old->granularity; - new->mapped_to_gpu = old->mapped_to_gpu; bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE); bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE); @@ -1346,6 +1405,108 @@ svm_range_unmap_from_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm, fence); } +/** + * svm_range_partial_mapped_dev - check if prange mapped on the specific GPU + * + * @gpuidx: the GPU index to check + * @prange: prange to check + * @start: the start address in pages + * @last: the last address in pages + * + * Return: + * true: if any part of the range within [start, last] mapped on the GPU + * false: if the entire range [start, last] not mapped on the GPU + */ +static bool +svm_range_partial_mapped_dev(uint32_t gpuidx, struct svm_range *prange, + unsigned long start, unsigned long last) +{ + unsigned long start_index, last_index; + + start = max(start, prange->start); + last = min(last, prange->last); + if (last < start) + return false; + + start_index = (start - prange->start) >> prange->granularity; + last_index = (last - prange->start) >> prange->granularity; + return find_next_bit(prange->bitmap_mapped[gpuidx], last_index + 1, + start_index) <= last_index; +} + +/** + * svm_range_partial_mapped - check if prange mapped on any GPU + * + * @prange: prange to check + * @start: the start address in pages + * @last: the last address in pages + * + * Return: + * true: if any part of prange mapped on any GPU currently + * false: if the entire range is not mapped on any GPU + */ +static bool +svm_range_partial_mapped(struct svm_range *prange, unsigned long start, + unsigned long last) +{ + struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms); + struct svm_range *pchild; + uint32_t gpuidx; + + for_each_set_bit(gpuidx, p->svms.bitmap_supported, p->n_pdds) { + list_for_each_entry(pchild, &prange->child_list, child_list) { + if (svm_range_partial_mapped_dev(gpuidx, pchild, start, last)) + return true; + } + + if (svm_range_partial_mapped_dev(gpuidx, prange, start, last)) + return true; + } + return false; +} + +/** + * svm_range_complete_mapped - check if prange mapped on all GPUs completely + * + * @prange: prange to check + * + * Return: + * true: if the entire prange mapped completely on all GPUs that need access + * otherwise return false + */ +static bool svm_range_complete_mapped(struct svm_range *prange) +{ + int nbits = svm_range_mapped_nbits(prange->start, prange->last, prange->granularity); + DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE); + uint32_t gpuidx; + int r; + + r = svm_range_need_access_gpus(bitmap, prange); + if (r) + return false; + + for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) + if (!bitmap_full(prange->bitmap_mapped[gpuidx], nbits)) + return false; + return true; +} + +static void +svm_range_update_mapped(uint32_t gpuidx, struct svm_range *prange, + unsigned long start, unsigned long last, bool mapped) +{ + unsigned long index, nbits; + + index = (start - prange->start) >> prange->granularity; + nbits = svm_range_mapped_nbits(start, last, prange->granularity); + if (mapped) + bitmap_set(prange->bitmap_mapped[gpuidx], index, nbits); + else + bitmap_clear(prange->bitmap_mapped[gpuidx], index, nbits); + pr_debug("prange 0x%p [0x%lx 0x%lx] update mapped %d nbits %ld gpu %d\n", + prange, start, last, mapped, nbits, gpuidx); +} + static int svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start, unsigned long last, uint32_t trigger) @@ -1357,29 +1518,28 @@ svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start, uint32_t gpuidx; int r = 0; - if (!prange->mapped_to_gpu) { - pr_debug("prange 0x%p [0x%lx 0x%lx] not mapped to GPU\n", - prange, prange->start, prange->last); - return 0; - } - - if (prange->start == start && prange->last == last) { - pr_debug("unmap svms 0x%p prange 0x%p\n", prange->svms, prange); - prange->mapped_to_gpu = false; - } - bitmap_or(bitmap, prange->bitmap_access, prange->bitmap_aip, MAX_GPU_INSTANCE); p = container_of(prange->svms, struct kfd_process, svms); for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) { - pr_debug("unmap from gpu idx 0x%x\n", gpuidx); pdd = kfd_process_device_from_gpuidx(p, gpuidx); if (!pdd) { pr_debug("failed to find device idx %d\n", gpuidx); - return -EINVAL; + continue; } + if (!svm_range_partial_mapped_dev(gpuidx, prange, start, last)) { + pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] not mapped on gpu %d\n", + prange->svms, prange, start, last, gpuidx); + continue; + } + + svm_range_update_mapped(gpuidx, prange, start, last, false); + + pr_debug("unmap svms 0x%p prange 0x%p [0x%lx 0x%lx] from gpu %d\n", + prange->svms, prange, start, last, gpuidx); + kfd_smi_event_unmap_from_gpu(pdd->dev, p->lead_thread->pid, start, last, trigger); @@ -1519,6 +1679,10 @@ svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset, if (r) break; + if (!r) + svm_range_update_mapped(gpuidx, prange, prange->start + offset, + prange->start + offset + npages - 1, true); + if (fence) { r = dma_fence_wait(fence, false); dma_fence_put(fence); @@ -1670,7 +1834,7 @@ static int svm_range_validate_and_map(struct mm_struct *mm, if (bitmap_empty(ctx->bitmap, MAX_GPU_INSTANCE)) { bitmap_copy(ctx->bitmap, prange->bitmap_access, MAX_GPU_INSTANCE); - if (!prange->mapped_to_gpu || + if (!svm_range_partial_mapped(prange, prange->start, prange->last) || bitmap_empty(ctx->bitmap, MAX_GPU_INSTANCE)) { r = 0; goto free_ctx; @@ -1755,9 +1919,6 @@ static int svm_range_validate_and_map(struct mm_struct *mm, r = svm_range_map_to_gpus(prange, offset, npages, readonly, ctx->bitmap, flush_tlb); - if (!r && next == end) - prange->mapped_to_gpu = true; - svm_range_unlock(prange); addr = next; @@ -1939,22 +2100,8 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm, if (!p->xnack_enabled || (prange->flags & KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED)) { int evicted_ranges; - bool mapped = prange->mapped_to_gpu; - - list_for_each_entry(pchild, &prange->child_list, child_list) { - if (!pchild->mapped_to_gpu) - continue; - mapped = true; - mutex_lock_nested(&pchild->lock, 1); - if (pchild->start <= last && pchild->last >= start) { - pr_debug("increment pchild invalid [0x%lx 0x%lx]\n", - pchild->start, pchild->last); - atomic_inc(&pchild->invalid); - } - mutex_unlock(&pchild->lock); - } - if (!mapped) + if (!svm_range_partial_mapped(prange, start, last)) return r; if (prange->start <= last && prange->last >= start) @@ -2005,7 +2152,10 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm, static struct svm_range *svm_range_clone(struct svm_range *old) { + struct kfd_process *p = container_of(old->svms, struct kfd_process, svms); struct svm_range *new; + uint32_t gpuidx; + int nbits; new = svm_range_new(old->svms, old->start, old->last, false); if (!new) @@ -2027,8 +2177,13 @@ static struct svm_range *svm_range_clone(struct svm_range *old) new->prefetch_loc = old->prefetch_loc; new->actual_loc = old->actual_loc; new->granularity = old->granularity; - new->mapped_to_gpu = old->mapped_to_gpu; new->vram_pages = old->vram_pages; + nbits = svm_range_mapped_nbits(new->start, new->last, new->granularity); + for_each_set_bit(gpuidx, p->svms.bitmap_supported, p->n_pdds) { + bitmap_copy(new->bitmap_mapped[gpuidx], + old->bitmap_mapped[gpuidx], + nbits); + }; bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE); bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE); @@ -2147,7 +2302,7 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size, next_start = min(node->last, last) + 1; if (svm_range_is_same_attrs(p, prange, nattr, attrs) && - prange->mapped_to_gpu) { + svm_range_complete_mapped(prange)) { /* nothing to do */ } else if (node->start < start || node->last > last) { /* node intersects the update range and its attributes @@ -3641,7 +3796,7 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm, if (migrated && (!p->xnack_enabled || (prange->flags & KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED)) && - prange->mapped_to_gpu) { + svm_range_partial_mapped(prange, prange->start, prange->last)) { pr_debug("restore_work will update mappings of GPUs\n"); mutex_unlock(&prange->migrate_mutex); continue; @@ -3652,7 +3807,8 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm, continue; } - flush_tlb = !migrated && update_mapping && prange->mapped_to_gpu; + flush_tlb = !migrated && update_mapping && + svm_range_partial_mapped(prange, prange->start, prange->last); r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE, true, flush_tlb); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h index bf2fde008115..7e165854bc0e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -136,7 +136,7 @@ struct svm_range { struct list_head child_list; DECLARE_BITMAP(bitmap_access, MAX_GPU_INSTANCE); DECLARE_BITMAP(bitmap_aip, MAX_GPU_INSTANCE); - bool mapped_to_gpu; + unsigned long *bitmap_mapped[MAX_GPU_INSTANCE]; }; static inline void svm_range_lock(struct svm_range *prange) @@ -167,6 +167,12 @@ static inline struct svm_range_bo *svm_range_bo_ref(struct svm_range_bo *svm_bo) return NULL; } +static inline int svm_range_mapped_nbits(uint64_t start, uint64_t last, + uint8_t granularity) +{ + return (last >> granularity) - (start >> granularity) + 1; +} + int svm_range_list_init(struct kfd_process *p); void svm_range_list_fini(struct kfd_process *p); int svm_ioctl(struct kfd_process *p, enum kfd_ioctl_svm_op op, uint64_t start, -- 2.35.1