As far as I can tell, the whole series is a small cleanup and big refactor to enable CPU clearing of PTs without a lot of ugliness or code duplication. It looks good to me. I haven't reviewed all the moved SDMA update code to make sure it all works correctly, but at least the prepare and commit functions look sane to me. For this patch I have a suggestion (inline) to remove params->ib, which seems redundant with params->job. That would also ripple through the remaining patches. Other than that, the series is Reviewed-by: Felix Kuehling <Felix.Kuehling@xxxxxxx> [+Kent], Look out for this patch series in an upcoming merge to amd-kfd-staging. I don't think it'll cause conflicts, but has a risk of regressions (like all big amdgpu_vm changes IME). Regards, Felix On 3/19/2019 8:44 AM, Christian König wrote: > Separate out all functions for SDMA and CPU based page table > updates into separate backends. > > This way we can keep most of the complexity of those from the > core VM code. > > Signed-off-by: Christian König <christian.koenig@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/Makefile | 3 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 7 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 30 ++- > drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c | 116 +++++++++ > drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 248 ++++++++++++++++++++ > 5 files changed, 401 insertions(+), 3 deletions(-) > create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c > create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c [snip] > +/** > + * amdgpu_vm_sdma_prepare - prepare SDMA command submission > + * > + * @p: see amdgpu_vm_update_params definition > + * @owner: owner we need to sync to > + * @exclusive: exclusive move fence we need to sync to > + * > + * Returns: > + * Negativ errno, 0 for success. > + */ > +static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p, > + void *owner, struct dma_fence *exclusive) > +{ > + struct amdgpu_bo *root = p->vm->root.base.bo; > + unsigned int ndw = AMDGPU_VM_SDMA_MIN_NUM_DW; > + int r; > + > + r = amdgpu_job_alloc_with_ib(p->adev, ndw * 4, &p->job); > + if (r) > + return r; > + > + r = amdgpu_sync_fence(p->adev, &p->job->sync, exclusive, false); > + if (r) > + return r; > + > + r = amdgpu_sync_resv(p->adev, &p->job->sync, root->tbo.resv, > + owner, false); > + if (r) > + return r; > + > + p->num_dw_left = ndw; > + p->ib = &p->job->ibs[0]; With p->job added, do we still need p->ib? We could just use &p->job->ibs[0] directly, which should perform the same or be more efficient since it's just a constant offset from p->job. > + return 0; > +} > + > +/** > + * amdgpu_vm_sdma_commit - commit SDMA command submission > + * > + * @p: see amdgpu_vm_update_params definition > + * @fence: resulting fence > + * > + * Returns: > + * Negativ errno, 0 for success. > + */ > +static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p, > + struct dma_fence **fence) > +{ > + struct amdgpu_bo *root = p->vm->root.base.bo; > + struct amdgpu_ring *ring; > + struct dma_fence *f; > + int r; > + > + ring = container_of(p->vm->entity.rq->sched, struct amdgpu_ring, sched); > + > + WARN_ON(p->ib->length_dw == 0); > + amdgpu_ring_pad_ib(ring, p->ib); > + WARN_ON(p->ib->length_dw > p->num_dw_left); > + r = amdgpu_job_submit(p->job, &p->vm->entity, > + AMDGPU_FENCE_OWNER_VM, &f); > + if (r) > + goto error; > + > + amdgpu_bo_fence(root, f, true); > + if (fence) > + swap(*fence, f); > + dma_fence_put(f); > + return 0; > + > +error: > + amdgpu_job_free(p->job); > + return r; > +} > + > + > +/** > + * amdgpu_vm_sdma_copy_ptes - copy the PTEs from mapping > + * > + * @p: see amdgpu_vm_update_params definition > + * @bo: PD/PT to update > + * @pe: addr of the page entry > + * @count: number of page entries to copy > + * > + * Traces the parameters and calls the DMA function to copy the PTEs. > + */ > +static void amdgpu_vm_sdma_copy_ptes(struct amdgpu_vm_update_params *p, > + struct amdgpu_bo *bo, uint64_t pe, > + unsigned count) > +{ > + uint64_t src = p->ib->gpu_addr; > + > + src += p->num_dw_left * 4; > + > + pe += amdgpu_bo_gpu_offset(bo); > + trace_amdgpu_vm_copy_ptes(pe, src, count); > + > + amdgpu_vm_copy_pte(p->adev, p->ib, pe, src, count); > +} > + > +/** > + * amdgpu_vm_sdma_set_ptes - helper to call the right asic function > + * > + * @p: see amdgpu_vm_update_params definition > + * @bo: PD/PT to update > + * @pe: addr of the page entry > + * @addr: dst addr to write into pe > + * @count: number of page entries to update > + * @incr: increase next addr by incr bytes > + * @flags: hw access flags > + * > + * Traces the parameters and calls the right asic functions > + * to setup the page table using the DMA. > + */ > +static void amdgpu_vm_sdma_set_ptes(struct amdgpu_vm_update_params *p, > + struct amdgpu_bo *bo, uint64_t pe, > + uint64_t addr, unsigned count, > + uint32_t incr, uint64_t flags) > +{ > + pe += amdgpu_bo_gpu_offset(bo); > + trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags); > + if (count < 3) { > + amdgpu_vm_write_pte(p->adev, p->ib, pe, addr | flags, > + count, incr); > + } else { > + amdgpu_vm_set_pte_pde(p->adev, p->ib, pe, addr, > + count, incr, flags); > + } > +} > + > +/** > + * amdgpu_vm_sdma_update - execute VM update > + * > + * @p: see amdgpu_vm_update_params definition > + * @bo: PD/PT to update > + * @pe: addr of the page entry > + * @addr: dst addr to write into pe > + * @count: number of page entries to update > + * @incr: increase next addr by incr bytes > + * @flags: hw access flags > + * > + * Reserve space in the IB, setup mapping buffer on demand and write commands to > + * the IB. > + */ > +static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p, > + struct amdgpu_bo *bo, uint64_t pe, > + uint64_t addr, unsigned count, uint32_t incr, > + uint64_t flags) > +{ > + unsigned int i, ndw, nptes; > + uint64_t *pte; > + int r; > + > + do { > + ndw = p->num_dw_left; > + ndw -= p->ib->length_dw; > + > + if (ndw < 32) { > + r = amdgpu_vm_sdma_commit(p, NULL); > + if (r) > + return r; > + > + /* estimate how many dw we need */ > + ndw = 32; > + if (p->pages_addr) > + ndw += count * 2; > + ndw = max(ndw, AMDGPU_VM_SDMA_MIN_NUM_DW); > + ndw = min(ndw, AMDGPU_VM_SDMA_MAX_NUM_DW); > + > + r = amdgpu_job_alloc_with_ib(p->adev, ndw * 4, &p->job); > + if (r) > + return r; > + > + p->num_dw_left = ndw; > + p->ib = &p->job->ibs[0]; > + } > + > + if (!p->pages_addr) { > + /* set page commands needed */ > + if (bo->shadow) > + amdgpu_vm_sdma_set_ptes(p, bo->shadow, pe, addr, > + count, incr, flags); > + amdgpu_vm_sdma_set_ptes(p, bo, pe, addr, count, > + incr, flags); > + return 0; > + } > + > + /* copy commands needed */ > + ndw -= p->adev->vm_manager.vm_pte_funcs->copy_pte_num_dw * > + (bo->shadow ? 2 : 1); > + > + /* for padding */ > + ndw -= 7; > + > + nptes = min(count, ndw / 2); > + > + /* Put the PTEs at the end of the IB. */ > + p->num_dw_left -= nptes * 2; > + pte = (uint64_t *)&(p->ib->ptr[p->num_dw_left]); > + for (i = 0; i < nptes; ++i, addr += incr) { > + pte[i] = amdgpu_vm_map_gart(p->pages_addr, addr); > + pte[i] |= flags; > + } > + > + if (bo->shadow) > + amdgpu_vm_sdma_copy_ptes(p, bo->shadow, pe, nptes); > + amdgpu_vm_sdma_copy_ptes(p, bo, pe, nptes); > + > + pe += nptes * 8; > + count -= nptes; > + } while (count); > + > + return 0; > +} > + > +const struct amdgpu_vm_update_funcs amdgpu_vm_sdma_funcs = { > + .prepare = amdgpu_vm_sdma_prepare, > + .update = amdgpu_vm_sdma_update, > + .commit = amdgpu_vm_sdma_commit > +}; _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx