Am 15.05.2017 um 23:32 schrieb Harish Kasiviswanathan: > If amdgpu.vm_update_context param is set to use CPU, then Page > Directories will be updated by CPU instead of SDMA > > Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan at amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 151 ++++++++++++++++++++++++--------- > 1 file changed, 109 insertions(+), 42 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > index 9c89cb2..d72a624 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > @@ -271,6 +271,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, > uint64_t saddr, uint64_t eaddr, > unsigned level) > { > + u64 flags; Reversed tree order, e.g. put it at the end of the variables. > unsigned shift = (adev->vm_manager.num_level - level) * > adev->vm_manager.block_size; > unsigned pt_idx, from, to; > @@ -299,6 +300,14 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, > saddr = saddr & ((1 << shift) - 1); > eaddr = eaddr & ((1 << shift) - 1); > > + flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | > + AMDGPU_GEM_CREATE_VRAM_CLEARED; > + if (vm->use_cpu_for_update) > + flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; > + else > + flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS | > + AMDGPU_GEM_CREATE_SHADOW); > + > /* walk over the address space and allocate the page tables */ > for (pt_idx = from; pt_idx <= to; ++pt_idx) { > struct reservation_object *resv = vm->root.bo->tbo.resv; > @@ -310,10 +319,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, > amdgpu_vm_bo_size(adev, level), > AMDGPU_GPU_PAGE_SIZE, true, > AMDGPU_GEM_DOMAIN_VRAM, > - AMDGPU_GEM_CREATE_NO_CPU_ACCESS | > - AMDGPU_GEM_CREATE_SHADOW | > - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | > - AMDGPU_GEM_CREATE_VRAM_CLEARED, > + flags, > NULL, resv, &pt); > if (r) > return r; > @@ -952,6 +958,43 @@ static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr) > return result; > } > > +/** > + * amdgpu_vm_cpu_set_ptes - helper to update page tables via CPU > + * > + * @params: see amdgpu_pte_update_params definition > + * @pe: kmap addr of the page entry > + * @addr: dst addr to write into pe > + * @count: number of page entries to update > + * @incr: increase next addr by incr bytes > + * @flags: hw access flags > + */ > +static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params, > + uint64_t pe, uint64_t addr, > + unsigned count, uint32_t incr, > + uint64_t flags) > +{ > + unsigned int i; > + > + for (i = 0; i < count; i++) { > + amdgpu_gart_set_pte_pde(params->adev, (void *)pe, > + i, addr, flags); > + addr += incr; > + } > + > + mb(); > + amdgpu_gart_flush_gpu_tlb(params->adev, 0); > +} > + A comment what the function does would be nice here. With those two nit picks fixed the patch is Reviewed-by: Christian König <christian.koenig at amd.com>. Regards, Christian. > +static void amdgpu_vm_bo_wait(struct amdgpu_device *adev, struct amdgpu_bo *bo) > +{ > + struct amdgpu_sync sync; > + > + amdgpu_sync_create(&sync); > + amdgpu_sync_resv(adev, &sync, bo->tbo.resv, AMDGPU_FENCE_OWNER_VM); > + amdgpu_sync_wait(&sync); > + amdgpu_sync_free(&sync); > +} > + > /* > * amdgpu_vm_update_level - update a single level in the hierarchy > * > @@ -981,34 +1024,50 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, > > if (!parent->entries) > return 0; > - ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); > > - /* padding, etc. */ > - ndw = 64; > + memset(¶ms, 0, sizeof(params)); > + params.adev = adev; > + shadow = parent->bo->shadow; > > - /* assume the worst case */ > - ndw += parent->last_entry_used * 6; > + WARN_ON(vm->use_cpu_for_update && shadow); > + if (vm->use_cpu_for_update && !shadow) { > + r = amdgpu_bo_kmap(parent->bo, (void **)&pd_addr); > + if (r) > + return r; > + amdgpu_vm_bo_wait(adev, parent->bo); > + params.func = amdgpu_vm_cpu_set_ptes; > + } else { > + if (shadow) { > + r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem); > + if (r) > + return r; > + } > + ring = container_of(vm->entity.sched, struct amdgpu_ring, > + sched); > > - pd_addr = amdgpu_bo_gpu_offset(parent->bo); > + /* padding, etc. */ > + ndw = 64; > > - shadow = parent->bo->shadow; > - if (shadow) { > - r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem); > + /* assume the worst case */ > + ndw += parent->last_entry_used * 6; > + > + pd_addr = amdgpu_bo_gpu_offset(parent->bo); > + > + if (shadow) { > + shadow_addr = amdgpu_bo_gpu_offset(shadow); > + ndw *= 2; > + } else { > + shadow_addr = 0; > + } > + > + r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job); > if (r) > return r; > - shadow_addr = amdgpu_bo_gpu_offset(shadow); > - ndw *= 2; > - } else { > - shadow_addr = 0; > - } > > - r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job); > - if (r) > - return r; > + params.ib = &job->ibs[0]; > + params.func = amdgpu_vm_do_set_ptes; > + } > > - memset(¶ms, 0, sizeof(params)); > - params.adev = adev; > - params.ib = &job->ibs[0]; > > /* walk over the address space and update the directory */ > for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) { > @@ -1043,15 +1102,15 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, > amdgpu_vm_adjust_mc_addr(adev, last_pt); > > if (shadow) > - amdgpu_vm_do_set_ptes(¶ms, > - last_shadow, > - pt_addr, count, > - incr, > - AMDGPU_PTE_VALID); > - > - amdgpu_vm_do_set_ptes(¶ms, last_pde, > - pt_addr, count, incr, > - AMDGPU_PTE_VALID); > + params.func(¶ms, > + last_shadow, > + pt_addr, count, > + incr, > + AMDGPU_PTE_VALID); > + > + params.func(¶ms, last_pde, > + pt_addr, count, incr, > + AMDGPU_PTE_VALID); > } > > count = 1; > @@ -1067,14 +1126,16 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, > uint64_t pt_addr = amdgpu_vm_adjust_mc_addr(adev, last_pt); > > if (vm->root.bo->shadow) > - amdgpu_vm_do_set_ptes(¶ms, last_shadow, pt_addr, > - count, incr, AMDGPU_PTE_VALID); > + params.func(¶ms, last_shadow, pt_addr, > + count, incr, AMDGPU_PTE_VALID); > > - amdgpu_vm_do_set_ptes(¶ms, last_pde, pt_addr, > - count, incr, AMDGPU_PTE_VALID); > + params.func(¶ms, last_pde, pt_addr, > + count, incr, AMDGPU_PTE_VALID); > } > > - if (params.ib->length_dw == 0) { > + if (params.func == amdgpu_vm_cpu_set_ptes) > + amdgpu_bo_kunmap(parent->bo); > + else if (params.ib->length_dw == 0) { > amdgpu_job_free(job); > } else { > amdgpu_ring_pad_ib(ring, params.ib); > @@ -2309,6 +2370,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, > struct amdgpu_ring *ring; > struct amd_sched_rq *rq; > int r, i; > + u64 flags; > > vm->va = RB_ROOT; > vm->client_id = atomic64_inc_return(&adev->vm_manager.client_counter); > @@ -2342,12 +2404,17 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, > "CPU update of VM recommended only for large BAR system\n"); > vm->last_dir_update = NULL; > > + flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | > + AMDGPU_GEM_CREATE_VRAM_CLEARED; > + if (vm->use_cpu_for_update) > + flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; > + else > + flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS | > + AMDGPU_GEM_CREATE_SHADOW); > + > r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0), align, true, > AMDGPU_GEM_DOMAIN_VRAM, > - AMDGPU_GEM_CREATE_NO_CPU_ACCESS | > - AMDGPU_GEM_CREATE_SHADOW | > - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | > - AMDGPU_GEM_CREATE_VRAM_CLEARED, > + flags, > NULL, NULL, &vm->root.bo); > if (r) > goto error_free_sched_entity;