If amdgpu.vm_update_context param is set to use CPU, then Page Directories will be updated by CPU instead of SDMA Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan at amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 151 ++++++++++++++++++++++++--------- 1 file changed, 109 insertions(+), 42 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 9c89cb2..d72a624 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -271,6 +271,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, uint64_t saddr, uint64_t eaddr, unsigned level) { + u64 flags; unsigned shift = (adev->vm_manager.num_level - level) * adev->vm_manager.block_size; unsigned pt_idx, from, to; @@ -299,6 +300,14 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, saddr = saddr & ((1 << shift) - 1); eaddr = eaddr & ((1 << shift) - 1); + flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | + AMDGPU_GEM_CREATE_VRAM_CLEARED; + if (vm->use_cpu_for_update) + flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + else + flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS | + AMDGPU_GEM_CREATE_SHADOW); + /* walk over the address space and allocate the page tables */ for (pt_idx = from; pt_idx <= to; ++pt_idx) { struct reservation_object *resv = vm->root.bo->tbo.resv; @@ -310,10 +319,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, amdgpu_vm_bo_size(adev, level), AMDGPU_GPU_PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, - AMDGPU_GEM_CREATE_NO_CPU_ACCESS | - AMDGPU_GEM_CREATE_SHADOW | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | - AMDGPU_GEM_CREATE_VRAM_CLEARED, + flags, NULL, resv, &pt); if (r) return r; @@ -952,6 +958,43 @@ static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr) return result; } +/** + * amdgpu_vm_cpu_set_ptes - helper to update page tables via CPU + * + * @params: see amdgpu_pte_update_params definition + * @pe: kmap addr of the page entry + * @addr: dst addr to write into pe + * @count: number of page entries to update + * @incr: increase next addr by incr bytes + * @flags: hw access flags + */ +static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params, + uint64_t pe, uint64_t addr, + unsigned count, uint32_t incr, + uint64_t flags) +{ + unsigned int i; + + for (i = 0; i < count; i++) { + amdgpu_gart_set_pte_pde(params->adev, (void *)pe, + i, addr, flags); + addr += incr; + } + + mb(); + amdgpu_gart_flush_gpu_tlb(params->adev, 0); +} + +static void amdgpu_vm_bo_wait(struct amdgpu_device *adev, struct amdgpu_bo *bo) +{ + struct amdgpu_sync sync; + + amdgpu_sync_create(&sync); + amdgpu_sync_resv(adev, &sync, bo->tbo.resv, AMDGPU_FENCE_OWNER_VM); + amdgpu_sync_wait(&sync); + amdgpu_sync_free(&sync); +} + /* * amdgpu_vm_update_level - update a single level in the hierarchy * @@ -981,34 +1024,50 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, if (!parent->entries) return 0; - ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); - /* padding, etc. */ - ndw = 64; + memset(¶ms, 0, sizeof(params)); + params.adev = adev; + shadow = parent->bo->shadow; - /* assume the worst case */ - ndw += parent->last_entry_used * 6; + WARN_ON(vm->use_cpu_for_update && shadow); + if (vm->use_cpu_for_update && !shadow) { + r = amdgpu_bo_kmap(parent->bo, (void **)&pd_addr); + if (r) + return r; + amdgpu_vm_bo_wait(adev, parent->bo); + params.func = amdgpu_vm_cpu_set_ptes; + } else { + if (shadow) { + r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem); + if (r) + return r; + } + ring = container_of(vm->entity.sched, struct amdgpu_ring, + sched); - pd_addr = amdgpu_bo_gpu_offset(parent->bo); + /* padding, etc. */ + ndw = 64; - shadow = parent->bo->shadow; - if (shadow) { - r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem); + /* assume the worst case */ + ndw += parent->last_entry_used * 6; + + pd_addr = amdgpu_bo_gpu_offset(parent->bo); + + if (shadow) { + shadow_addr = amdgpu_bo_gpu_offset(shadow); + ndw *= 2; + } else { + shadow_addr = 0; + } + + r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job); if (r) return r; - shadow_addr = amdgpu_bo_gpu_offset(shadow); - ndw *= 2; - } else { - shadow_addr = 0; - } - r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job); - if (r) - return r; + params.ib = &job->ibs[0]; + params.func = amdgpu_vm_do_set_ptes; + } - memset(¶ms, 0, sizeof(params)); - params.adev = adev; - params.ib = &job->ibs[0]; /* walk over the address space and update the directory */ for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) { @@ -1043,15 +1102,15 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, amdgpu_vm_adjust_mc_addr(adev, last_pt); if (shadow) - amdgpu_vm_do_set_ptes(¶ms, - last_shadow, - pt_addr, count, - incr, - AMDGPU_PTE_VALID); - - amdgpu_vm_do_set_ptes(¶ms, last_pde, - pt_addr, count, incr, - AMDGPU_PTE_VALID); + params.func(¶ms, + last_shadow, + pt_addr, count, + incr, + AMDGPU_PTE_VALID); + + params.func(¶ms, last_pde, + pt_addr, count, incr, + AMDGPU_PTE_VALID); } count = 1; @@ -1067,14 +1126,16 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, uint64_t pt_addr = amdgpu_vm_adjust_mc_addr(adev, last_pt); if (vm->root.bo->shadow) - amdgpu_vm_do_set_ptes(¶ms, last_shadow, pt_addr, - count, incr, AMDGPU_PTE_VALID); + params.func(¶ms, last_shadow, pt_addr, + count, incr, AMDGPU_PTE_VALID); - amdgpu_vm_do_set_ptes(¶ms, last_pde, pt_addr, - count, incr, AMDGPU_PTE_VALID); + params.func(¶ms, last_pde, pt_addr, + count, incr, AMDGPU_PTE_VALID); } - if (params.ib->length_dw == 0) { + if (params.func == amdgpu_vm_cpu_set_ptes) + amdgpu_bo_kunmap(parent->bo); + else if (params.ib->length_dw == 0) { amdgpu_job_free(job); } else { amdgpu_ring_pad_ib(ring, params.ib); @@ -2309,6 +2370,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_ring *ring; struct amd_sched_rq *rq; int r, i; + u64 flags; vm->va = RB_ROOT; vm->client_id = atomic64_inc_return(&adev->vm_manager.client_counter); @@ -2342,12 +2404,17 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, "CPU update of VM recommended only for large BAR system\n"); vm->last_dir_update = NULL; + flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | + AMDGPU_GEM_CREATE_VRAM_CLEARED; + if (vm->use_cpu_for_update) + flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + else + flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS | + AMDGPU_GEM_CREATE_SHADOW); + r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0), align, true, AMDGPU_GEM_DOMAIN_VRAM, - AMDGPU_GEM_CREATE_NO_CPU_ACCESS | - AMDGPU_GEM_CREATE_SHADOW | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | - AMDGPU_GEM_CREATE_VRAM_CLEARED, + flags, NULL, NULL, &vm->root.bo); if (r) goto error_free_sched_entity; -- 1.9.1