On 2017å¹´05æ??17æ?¥ 05:02, Kasiviswanathan, Harish wrote: > > > -----Original Message----- > From: Zhou, David(ChunMing) > Sent: Monday, May 15, 2017 10:50 PM > To: Kasiviswanathan, Harish <Harish.Kasiviswanathan at amd.com>; > amd-gfx at lists.freedesktop.org > Subject: Re: [PATCH 4/5] drm/amdgpu: Support page directory update via CPU > > > > On 2017å¹´05æ??16æ?¥ 05:32, Harish Kasiviswanathan wrote: > > If amdgpu.vm_update_context param is set to use CPU, then Page > > Directories will be updated by CPU instead of SDMA > > > > Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan at amd.com> > > --- > > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 151 > ++++++++++++++++++++++++--------- > > 1 file changed, 109 insertions(+), 42 deletions(-) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > > index 9c89cb2..d72a624 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > > @@ -271,6 +271,7 @@ static int amdgpu_vm_alloc_levels(struct > amdgpu_device *adev, > > uint64_t saddr, uint64_t eaddr, > > unsigned level) > > { > > + u64 flags; > > unsigned shift = (adev->vm_manager.num_level - level) * > > adev->vm_manager.block_size; > > unsigned pt_idx, from, to; > > @@ -299,6 +300,14 @@ static int amdgpu_vm_alloc_levels(struct > amdgpu_device *adev, > > saddr = saddr & ((1 << shift) - 1); > > eaddr = eaddr & ((1 << shift) - 1); > > > > + flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | > > + AMDGPU_GEM_CREATE_VRAM_CLEARED; > > + if (vm->use_cpu_for_update) > > + flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; > I think shadow flag is need for CPU case as well, which is used to > backup VM bo and meaningful when gpu reset. > same comment for pd bo. > > [HK]: Yes support for shadow BOs are desirable and it could be > implemented as a separate commit. For supporting shadow BOs the caller > should explicitly add shadow BOs into ttm_eu_reserve_buffer(..) to > remove the BO from TTM swap list or ttm_bo_kmap has to be modified. > This implementation for CPU update of VM page tables is mainly for KFD > usage. Graphics will use for experimental and testing purpose. From > KFD's view point shadow BO are not useful because if GPU is reset then > all queue information is lost (since submissions are done by user > space) and it is not possible to recover. Either way is fine to me. David Zhou > > Regards, > David Zhou > > + else > > + flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS | > > + AMDGPU_GEM_CREATE_SHADOW); > > + > > /* walk over the address space and allocate the page tables */ > > for (pt_idx = from; pt_idx <= to; ++pt_idx) { > > struct reservation_object *resv = vm->root.bo->tbo.resv; > > @@ -310,10 +319,7 @@ static int amdgpu_vm_alloc_levels(struct > amdgpu_device *adev, > > amdgpu_vm_bo_size(adev, level), > > AMDGPU_GPU_PAGE_SIZE, true, > > AMDGPU_GEM_DOMAIN_VRAM, > > - AMDGPU_GEM_CREATE_NO_CPU_ACCESS | > > - AMDGPU_GEM_CREATE_SHADOW | > > - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | > > - AMDGPU_GEM_CREATE_VRAM_CLEARED, > > + flags, > > NULL, resv, &pt); > > if (r) > > return r; > > @@ -952,6 +958,43 @@ static uint64_t amdgpu_vm_map_gart(const > dma_addr_t *pages_addr, uint64_t addr) > > return result; > > } > > > > +/** > > + * amdgpu_vm_cpu_set_ptes - helper to update page tables via CPU > > + * > > + * @params: see amdgpu_pte_update_params definition > > + * @pe: kmap addr of the page entry > > + * @addr: dst addr to write into pe > > + * @count: number of page entries to update > > + * @incr: increase next addr by incr bytes > > + * @flags: hw access flags > > + */ > > +static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params > *params, > > + uint64_t pe, uint64_t addr, > > + unsigned count, uint32_t incr, > > + uint64_t flags) > > +{ > > + unsigned int i; > > + > > + for (i = 0; i < count; i++) { > > + amdgpu_gart_set_pte_pde(params->adev, (void *)pe, > > + i, addr, flags); > > + addr += incr; > > + } > > + > > + mb(); > > + amdgpu_gart_flush_gpu_tlb(params->adev, 0); > > +} > > + > > +static void amdgpu_vm_bo_wait(struct amdgpu_device *adev, struct > amdgpu_bo *bo) > > +{ > > + struct amdgpu_sync sync; > > + > > + amdgpu_sync_create(&sync); > > + amdgpu_sync_resv(adev, &sync, bo->tbo.resv, > AMDGPU_FENCE_OWNER_VM); > > + amdgpu_sync_wait(&sync); > > + amdgpu_sync_free(&sync); > > +} > > + > > /* > > * amdgpu_vm_update_level - update a single level in the hierarchy > > * > > @@ -981,34 +1024,50 @@ static int amdgpu_vm_update_level(struct > amdgpu_device *adev, > > > > if (!parent->entries) > > return 0; > > - ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); > > > > - /* padding, etc. */ > > - ndw = 64; > > + memset(¶ms, 0, sizeof(params)); > > + params.adev = adev; > > + shadow = parent->bo->shadow; > > > > - /* assume the worst case */ > > - ndw += parent->last_entry_used * 6; > > + WARN_ON(vm->use_cpu_for_update && shadow); > > + if (vm->use_cpu_for_update && !shadow) { > > + r = amdgpu_bo_kmap(parent->bo, (void **)&pd_addr); > > + if (r) > > + return r; > > + amdgpu_vm_bo_wait(adev, parent->bo); > > + params.func = amdgpu_vm_cpu_set_ptes; > > + } else { > > + if (shadow) { > > + r = amdgpu_ttm_bind(&shadow->tbo, > &shadow->tbo.mem); > > + if (r) > > + return r; > > + } > > + ring = container_of(vm->entity.sched, struct amdgpu_ring, > > + sched); > > > > - pd_addr = amdgpu_bo_gpu_offset(parent->bo); > > + /* padding, etc. */ > > + ndw = 64; > > > > - shadow = parent->bo->shadow; > > - if (shadow) { > > - r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem); > > + /* assume the worst case */ > > + ndw += parent->last_entry_used * 6; > > + > > + pd_addr = amdgpu_bo_gpu_offset(parent->bo); > > + > > + if (shadow) { > > + shadow_addr = amdgpu_bo_gpu_offset(shadow); > > + ndw *= 2; > > + } else { > > + shadow_addr = 0; > > + } > > + > > + r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job); > > if (r) > > return r; > > - shadow_addr = amdgpu_bo_gpu_offset(shadow); > > - ndw *= 2; > > - } else { > > - shadow_addr = 0; > > - } > > > > - r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job); > > - if (r) > > - return r; > > + params.ib = &job->ibs[0]; > > + params.func = amdgpu_vm_do_set_ptes; > > + } > > > > - memset(¶ms, 0, sizeof(params)); > > - params.adev = adev; > > - params.ib = &job->ibs[0]; > > > > /* walk over the address space and update the directory */ > > for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) { > > @@ -1043,15 +1102,15 @@ static int amdgpu_vm_update_level(struct > amdgpu_device *adev, > > amdgpu_vm_adjust_mc_addr(adev, last_pt); > > > > if (shadow) > > - amdgpu_vm_do_set_ptes(¶ms, > > - last_shadow, > > - pt_addr, count, > > - incr, > > - AMDGPU_PTE_VALID); > > - > > - amdgpu_vm_do_set_ptes(¶ms, last_pde, > > - pt_addr, count, incr, > > - AMDGPU_PTE_VALID); > > + params.func(¶ms, > > + last_shadow, > > + pt_addr, count, > > + incr, > > + AMDGPU_PTE_VALID); > > + > > + params.func(¶ms, last_pde, > > + pt_addr, count, incr, > > + AMDGPU_PTE_VALID); > > } > > > > count = 1; > > @@ -1067,14 +1126,16 @@ static int amdgpu_vm_update_level(struct > amdgpu_device *adev, > > uint64_t pt_addr = amdgpu_vm_adjust_mc_addr(adev, > last_pt); > > > > if (vm->root.bo->shadow) > > - amdgpu_vm_do_set_ptes(¶ms, last_shadow, pt_addr, > > - count, incr, > AMDGPU_PTE_VALID); > > + params.func(¶ms, last_shadow, pt_addr, > > + count, incr, AMDGPU_PTE_VALID); > > > > - amdgpu_vm_do_set_ptes(¶ms, last_pde, pt_addr, > > - count, incr, AMDGPU_PTE_VALID); > > + params.func(¶ms, last_pde, pt_addr, > > + count, incr, AMDGPU_PTE_VALID); > > } > > > > - if (params.ib->length_dw == 0) { > > + if (params.func == amdgpu_vm_cpu_set_ptes) > > + amdgpu_bo_kunmap(parent->bo); > > + else if (params.ib->length_dw == 0) { > > amdgpu_job_free(job); > > } else { > > amdgpu_ring_pad_ib(ring, params.ib); > > @@ -2309,6 +2370,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, > struct amdgpu_vm *vm, > > struct amdgpu_ring *ring; > > struct amd_sched_rq *rq; > > int r, i; > > + u64 flags; > > > > vm->va = RB_ROOT; > > vm->client_id = > atomic64_inc_return(&adev->vm_manager.client_counter); > > @@ -2342,12 +2404,17 @@ int amdgpu_vm_init(struct amdgpu_device > *adev, struct amdgpu_vm *vm, > > "CPU update of VM recommended only for large BAR > system\n"); > > vm->last_dir_update = NULL; > > > > + flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | > > + AMDGPU_GEM_CREATE_VRAM_CLEARED; > > + if (vm->use_cpu_for_update) > > + flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; > > + else > > + flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS | > > + AMDGPU_GEM_CREATE_SHADOW); > > + > > r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0), align, > true, > > AMDGPU_GEM_DOMAIN_VRAM, > > - AMDGPU_GEM_CREATE_NO_CPU_ACCESS | > > - AMDGPU_GEM_CREATE_SHADOW | > > - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | > > - AMDGPU_GEM_CREATE_VRAM_CLEARED, > > + flags, > > NULL, NULL, &vm->root.bo); > > if (r) > > goto error_free_sched_entity; > -------------- next part -------------- An HTML attachment was scrubbed... URL: <https://lists.freedesktop.org/archives/amd-gfx/attachments/20170517/c3f0fc26/attachment-0001.html>