On 2019-03-25 7:38 a.m., Christian König wrote: > Am 20.03.19 um 12:57 schrieb Kuehling, Felix: >> As far as I can tell, the whole series is a small cleanup and big >> refactor to enable CPU clearing of PTs without a lot of ugliness or code >> duplication. > > It's a bit more than that. Key point is that I can now easily add a > parameter for direct submission during page fault handling :) You mean for working around problems with CPU page table updates from page faults, to force all such updates through SDMA? Regards, Felix > > Christian. > >> It looks good to me. I haven't reviewed all the moved SDMA >> update code to make sure it all works correctly, but at least the >> prepare and commit functions look sane to me. >> >> For this patch I have a suggestion (inline) to remove params->ib, which >> seems redundant with params->job. That would also ripple through the >> remaining patches. >> >> Other than that, the series is Reviewed-by: Felix Kuehling >> <Felix.Kuehling@xxxxxxx> >> >> [+Kent], Look out for this patch series in an upcoming merge to >> amd-kfd-staging. I don't think it'll cause conflicts, but has a risk of >> regressions (like all big amdgpu_vm changes IME). >> >> Regards, >> Felix >> >> On 3/19/2019 8:44 AM, Christian König wrote: >>> Separate out all functions for SDMA and CPU based page table >>> updates into separate backends. >>> >>> This way we can keep most of the complexity of those from the >>> core VM code. >>> >>> Signed-off-by: Christian König <christian.koenig@xxxxxxx> >>> --- >>> drivers/gpu/drm/amd/amdgpu/Makefile | 3 +- >>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 7 +- >>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 30 ++- >>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c | 116 +++++++++ >>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 248 >>> ++++++++++++++++++++ >>> 5 files changed, 401 insertions(+), 3 deletions(-) >>> create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c >>> create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c >> [snip] >>> +/** >>> + * amdgpu_vm_sdma_prepare - prepare SDMA command submission >>> + * >>> + * @p: see amdgpu_vm_update_params definition >>> + * @owner: owner we need to sync to >>> + * @exclusive: exclusive move fence we need to sync to >>> + * >>> + * Returns: >>> + * Negativ errno, 0 for success. >>> + */ >>> +static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p, >>> + void *owner, struct dma_fence *exclusive) >>> +{ >>> + struct amdgpu_bo *root = p->vm->root.base.bo; >>> + unsigned int ndw = AMDGPU_VM_SDMA_MIN_NUM_DW; >>> + int r; >>> + >>> + r = amdgpu_job_alloc_with_ib(p->adev, ndw * 4, &p->job); >>> + if (r) >>> + return r; >>> + >>> + r = amdgpu_sync_fence(p->adev, &p->job->sync, exclusive, false); >>> + if (r) >>> + return r; >>> + >>> + r = amdgpu_sync_resv(p->adev, &p->job->sync, root->tbo.resv, >>> + owner, false); >>> + if (r) >>> + return r; >>> + >>> + p->num_dw_left = ndw; >>> + p->ib = &p->job->ibs[0]; >> With p->job added, do we still need p->ib? We could just use >> &p->job->ibs[0] directly, which should perform the same or be more >> efficient since it's just a constant offset from p->job. >> >> >>> + return 0; >>> +} >>> + >>> +/** >>> + * amdgpu_vm_sdma_commit - commit SDMA command submission >>> + * >>> + * @p: see amdgpu_vm_update_params definition >>> + * @fence: resulting fence >>> + * >>> + * Returns: >>> + * Negativ errno, 0 for success. >>> + */ >>> +static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p, >>> + struct dma_fence **fence) >>> +{ >>> + struct amdgpu_bo *root = p->vm->root.base.bo; >>> + struct amdgpu_ring *ring; >>> + struct dma_fence *f; >>> + int r; >>> + >>> + ring = container_of(p->vm->entity.rq->sched, struct >>> amdgpu_ring, sched); >>> + >>> + WARN_ON(p->ib->length_dw == 0); >>> + amdgpu_ring_pad_ib(ring, p->ib); >>> + WARN_ON(p->ib->length_dw > p->num_dw_left); >>> + r = amdgpu_job_submit(p->job, &p->vm->entity, >>> + AMDGPU_FENCE_OWNER_VM, &f); >>> + if (r) >>> + goto error; >>> + >>> + amdgpu_bo_fence(root, f, true); >>> + if (fence) >>> + swap(*fence, f); >>> + dma_fence_put(f); >>> + return 0; >>> + >>> +error: >>> + amdgpu_job_free(p->job); >>> + return r; >>> +} >>> + >>> + >>> +/** >>> + * amdgpu_vm_sdma_copy_ptes - copy the PTEs from mapping >>> + * >>> + * @p: see amdgpu_vm_update_params definition >>> + * @bo: PD/PT to update >>> + * @pe: addr of the page entry >>> + * @count: number of page entries to copy >>> + * >>> + * Traces the parameters and calls the DMA function to copy the PTEs. >>> + */ >>> +static void amdgpu_vm_sdma_copy_ptes(struct amdgpu_vm_update_params >>> *p, >>> + struct amdgpu_bo *bo, uint64_t pe, >>> + unsigned count) >>> +{ >>> + uint64_t src = p->ib->gpu_addr; >>> + >>> + src += p->num_dw_left * 4; >>> + >>> + pe += amdgpu_bo_gpu_offset(bo); >>> + trace_amdgpu_vm_copy_ptes(pe, src, count); >>> + >>> + amdgpu_vm_copy_pte(p->adev, p->ib, pe, src, count); >>> +} >>> + >>> +/** >>> + * amdgpu_vm_sdma_set_ptes - helper to call the right asic function >>> + * >>> + * @p: see amdgpu_vm_update_params definition >>> + * @bo: PD/PT to update >>> + * @pe: addr of the page entry >>> + * @addr: dst addr to write into pe >>> + * @count: number of page entries to update >>> + * @incr: increase next addr by incr bytes >>> + * @flags: hw access flags >>> + * >>> + * Traces the parameters and calls the right asic functions >>> + * to setup the page table using the DMA. >>> + */ >>> +static void amdgpu_vm_sdma_set_ptes(struct amdgpu_vm_update_params *p, >>> + struct amdgpu_bo *bo, uint64_t pe, >>> + uint64_t addr, unsigned count, >>> + uint32_t incr, uint64_t flags) >>> +{ >>> + pe += amdgpu_bo_gpu_offset(bo); >>> + trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags); >>> + if (count < 3) { >>> + amdgpu_vm_write_pte(p->adev, p->ib, pe, addr | flags, >>> + count, incr); >>> + } else { >>> + amdgpu_vm_set_pte_pde(p->adev, p->ib, pe, addr, >>> + count, incr, flags); >>> + } >>> +} >>> + >>> +/** >>> + * amdgpu_vm_sdma_update - execute VM update >>> + * >>> + * @p: see amdgpu_vm_update_params definition >>> + * @bo: PD/PT to update >>> + * @pe: addr of the page entry >>> + * @addr: dst addr to write into pe >>> + * @count: number of page entries to update >>> + * @incr: increase next addr by incr bytes >>> + * @flags: hw access flags >>> + * >>> + * Reserve space in the IB, setup mapping buffer on demand and >>> write commands to >>> + * the IB. >>> + */ >>> +static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p, >>> + struct amdgpu_bo *bo, uint64_t pe, >>> + uint64_t addr, unsigned count, uint32_t incr, >>> + uint64_t flags) >>> +{ >>> + unsigned int i, ndw, nptes; >>> + uint64_t *pte; >>> + int r; >>> + >>> + do { >>> + ndw = p->num_dw_left; >>> + ndw -= p->ib->length_dw; >>> + >>> + if (ndw < 32) { >>> + r = amdgpu_vm_sdma_commit(p, NULL); >>> + if (r) >>> + return r; >>> + >>> + /* estimate how many dw we need */ >>> + ndw = 32; >>> + if (p->pages_addr) >>> + ndw += count * 2; >>> + ndw = max(ndw, AMDGPU_VM_SDMA_MIN_NUM_DW); >>> + ndw = min(ndw, AMDGPU_VM_SDMA_MAX_NUM_DW); >>> + >>> + r = amdgpu_job_alloc_with_ib(p->adev, ndw * 4, &p->job); >>> + if (r) >>> + return r; >>> + >>> + p->num_dw_left = ndw; >>> + p->ib = &p->job->ibs[0]; >>> + } >>> + >>> + if (!p->pages_addr) { >>> + /* set page commands needed */ >>> + if (bo->shadow) >>> + amdgpu_vm_sdma_set_ptes(p, bo->shadow, pe, addr, >>> + count, incr, flags); >>> + amdgpu_vm_sdma_set_ptes(p, bo, pe, addr, count, >>> + incr, flags); >>> + return 0; >>> + } >>> + >>> + /* copy commands needed */ >>> + ndw -= p->adev->vm_manager.vm_pte_funcs->copy_pte_num_dw * >>> + (bo->shadow ? 2 : 1); >>> + >>> + /* for padding */ >>> + ndw -= 7; >>> + >>> + nptes = min(count, ndw / 2); >>> + >>> + /* Put the PTEs at the end of the IB. */ >>> + p->num_dw_left -= nptes * 2; >>> + pte = (uint64_t *)&(p->ib->ptr[p->num_dw_left]); >>> + for (i = 0; i < nptes; ++i, addr += incr) { >>> + pte[i] = amdgpu_vm_map_gart(p->pages_addr, addr); >>> + pte[i] |= flags; >>> + } >>> + >>> + if (bo->shadow) >>> + amdgpu_vm_sdma_copy_ptes(p, bo->shadow, pe, nptes); >>> + amdgpu_vm_sdma_copy_ptes(p, bo, pe, nptes); >>> + >>> + pe += nptes * 8; >>> + count -= nptes; >>> + } while (count); >>> + >>> + return 0; >>> +} >>> + >>> +const struct amdgpu_vm_update_funcs amdgpu_vm_sdma_funcs = { >>> + .prepare = amdgpu_vm_sdma_prepare, >>> + .update = amdgpu_vm_sdma_update, >>> + .commit = amdgpu_vm_sdma_commit >>> +}; > _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx