Add helper function xe_bind_svm_range to bind a svm range to gpu. A temporary xe_vma is created locally to re-use existing page table update functions which are vma-based. The svm page table update lock design is different from userptr and bo page table update. A xe_pt_svm_pre_commit function is introduced for svm range pre-commitment. A hmm_range pointer is added to xe_vma struct. v1: Make userptr member to be the last member of xe_vma struct Signed-off-by: Oak Zeng <oak.zeng@xxxxxxxxx> Cc: Niranjana Vishwanathapura <niranjana.vishwanathapura@xxxxxxxxx> Cc: Matthew Brost <matthew.brost@xxxxxxxxx> Cc: Thomas Hellström <thomas.hellstrom@xxxxxxxxx> Cc: Brian Welty <brian.welty@xxxxxxxxx> --- drivers/gpu/drm/xe/xe_pt.c | 114 +++++++++++++++++++++++++++++-- drivers/gpu/drm/xe/xe_pt.h | 4 ++ drivers/gpu/drm/xe/xe_vm_types.h | 13 +++- 3 files changed, 126 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index de1030a47588..f1e479fa3001 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -17,6 +17,7 @@ #include "xe_trace.h" #include "xe_ttm_stolen_mgr.h" #include "xe_vm.h" +#include "xe_svm.h" struct xe_pt_dir { struct xe_pt pt; @@ -582,8 +583,15 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, { struct xe_device *xe = tile_to_xe(tile); struct xe_bo *bo = xe_vma_bo(vma); - bool is_devmem = !xe_vma_is_userptr(vma) && bo && - (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)); + /* + * FIXME: Right now assume all svm ranges bound to GPU is backed + * by device memory. This assumption will change once migration + * policy is implemented. A svm range's backing store can be a + * mixture of device memory and system memory, page by page based. + * We probably need a separate stage_bind function for svm. + */ + bool is_devmem = vma->svm_sg || (!xe_vma_is_userptr(vma) && bo && + (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo))); struct xe_res_cursor curs; struct xe_pt_stage_bind_walk xe_walk = { .base = { @@ -617,7 +625,10 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, xe_bo_assert_held(bo); if (!xe_vma_is_null(vma)) { - if (xe_vma_is_userptr(vma)) + if (vma->svm_sg) + xe_res_first_sg(vma->svm_sg, 0, xe_vma_size(vma), + &curs); + else if (xe_vma_is_userptr(vma)) xe_res_first_sg(vma->userptr.sg, 0, xe_vma_size(vma), &curs); else if (xe_bo_is_vram(bo) || xe_bo_is_stolen(bo)) @@ -1046,6 +1057,28 @@ static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update) return 0; } +static int xe_pt_svm_pre_commit(struct xe_migrate_pt_update *pt_update) +{ + struct xe_vma *vma = pt_update->vma; + struct hmm_range *range = vma->hmm_range; + + if (mmu_interval_read_retry(range->notifier, + range->notifier_seq)) { + /* + * FIXME: is this really necessary? We didn't update GPU + * page table yet... + */ + xe_vm_invalidate_vma(vma); + return -EAGAIN; + } + return 0; +} + +static const struct xe_migrate_pt_update_ops svm_bind_ops = { + .populate = xe_vm_populate_pgtable, + .pre_commit = xe_pt_svm_pre_commit, +}; + static const struct xe_migrate_pt_update_ops bind_ops = { .populate = xe_vm_populate_pgtable, .pre_commit = xe_pt_pre_commit, @@ -1197,7 +1230,8 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue struct xe_vm_pgtable_update entries[XE_VM_MAX_LEVEL * 2 + 1]; struct xe_pt_migrate_pt_update bind_pt_update = { .base = { - .ops = xe_vma_is_userptr(vma) ? &userptr_bind_ops : &bind_ops, + .ops = vma->svm_sg ? &svm_bind_ops : + (xe_vma_is_userptr(vma) ? &userptr_bind_ops : &bind_ops), .vma = vma, .tile_id = tile->id, }, @@ -1651,3 +1685,75 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queu return fence; } + +/** + * xe_bind_svm_range() - bind an address range to vm + * + * @vm: the vm to bind this address range + * @tile: the tile to bind this address range to + * @range: a hmm_range which includes all the information + * needed for binding: virtual address range and physical + * pfns to back up this virtual address range. + * @flags: the binding flags to set in pte + * + * This is a helper function used by svm sub-system + * to bind a svm range to gpu vm. svm sub-system + * doesn't have xe_vma, thus helpers such as + * __xe_pt_bind_vma can't be used directly. So this + * helper is written for svm sub-system to use. + * + * This is a synchronous function. When this function + * returns, either the svm range is bound to GPU, or + * error happened. + * + * Return: 0 for success or error code for failure + * If -EAGAIN returns, it means mmu notifier was called ( + * aka there was concurrent cpu page table update) during + * this function, caller has to retry hmm_range_fault + */ +int xe_bind_svm_range(struct xe_vm *vm, struct xe_tile *tile, + struct hmm_range *range, u64 flags) +{ + struct dma_fence *fence = NULL; + struct xe_svm *svm = vm->svm; + int ret = 0; + /* + * Create a temp vma to reuse page table helpers such as + * __xe_pt_bind_vma + */ + struct xe_vma vma = { + .gpuva = { + .va = { + .addr = range->start, + .range = range->end - range->start + 1, + }, + .vm = &vm->gpuvm, + .flags = flags, + }, + .tile_mask = 0x1 << tile->id, + .hmm_range = range, + }; + + xe_svm_build_sg(range, &vma.svm_sgt); + vma.svm_sg = &vma.svm_sgt; + + mutex_lock(&svm->mutex); + if (mmu_interval_read_retry(range->notifier, range->notifier_seq)) { + ret = -EAGAIN; + goto unlock; + } + xe_vm_lock(vm, true); + fence = __xe_pt_bind_vma(tile, &vma, vm->q[tile->id], NULL, 0, false); + xe_vm_unlock(vm); + +unlock: + mutex_unlock(&svm->mutex); + sg_free_table(vma.svm_sg); + + if (IS_ERR(fence)) + return PTR_ERR(fence); + + dma_fence_wait(fence, false); + dma_fence_put(fence); + return ret; +} diff --git a/drivers/gpu/drm/xe/xe_pt.h b/drivers/gpu/drm/xe/xe_pt.h index 71a4fbfcff43..775d08707466 100644 --- a/drivers/gpu/drm/xe/xe_pt.h +++ b/drivers/gpu/drm/xe/xe_pt.h @@ -17,6 +17,8 @@ struct xe_sync_entry; struct xe_tile; struct xe_vm; struct xe_vma; +struct xe_svm; +struct hmm_range; /* Largest huge pte is currently 1GiB. May become device dependent. */ #define MAX_HUGEPTE_LEVEL 2 @@ -45,4 +47,6 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queu bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma); +int xe_bind_svm_range(struct xe_vm *vm, struct xe_tile *tile, + struct hmm_range *range, u64 flags); #endif diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 037fb7168c63..68c7484b2110 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -21,6 +21,7 @@ struct xe_svm; struct xe_bo; struct xe_sync_entry; struct xe_vm; +struct hmm_range; #define TEST_VM_ASYNC_OPS_ERROR #define FORCE_ASYNC_OP_ERROR BIT(31) @@ -107,9 +108,19 @@ struct xe_vma { */ u16 pat_index; + /** + * @svm_sgt: a scatter gather table to save svm virtual address range's + * pfns + */ + struct sg_table svm_sgt; + struct sg_table *svm_sg; + /** hmm range of this pt update, used by svm */ + struct hmm_range *hmm_range; + /** * @userptr: user pointer state, only allocated for VMAs that are - * user pointers + * user pointers. When you add new members to xe_vma struct, userptr + * has to be the last member, xe_vma_create assumes this. */ struct xe_userptr userptr; }; -- 2.26.3