[PATCH 4/5] drm/amdgpu: Support page directory update via CPU

david1.zhou@xxxxxxx (zhoucm1) · Wed, 17 May 2017 09:54:52 +0800



On 2017å¹´05æ??17æ?¥ 05:02, Kasiviswanathan, Harish wrote:
>
>
> -----Original Message-----
> From: Zhou, David(ChunMing)
> Sent: Monday, May 15, 2017 10:50 PM
> To: Kasiviswanathan, Harish <Harish.Kasiviswanathan at amd.com>; 
> amd-gfx at lists.freedesktop.org
> Subject: Re: [PATCH 4/5] drm/amdgpu: Support page directory update via CPU
>
>
>
> On 2017å¹´05æ??16æ?¥ 05:32, Harish Kasiviswanathan wrote:
> > If amdgpu.vm_update_context param is set to use CPU, then Page
> > Directories will be updated by CPU instead of SDMA
> >
> > Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan at amd.com>
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 151 
> ++++++++++++++++++++++++---------
> >   1 file changed, 109 insertions(+), 42 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> > index 9c89cb2..d72a624 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> > @@ -271,6 +271,7 @@ static int amdgpu_vm_alloc_levels(struct 
> amdgpu_device *adev,
> >                                  uint64_t saddr, uint64_t eaddr,
> >                                  unsigned level)
> >   {
> > +     u64 flags;
> >        unsigned shift = (adev->vm_manager.num_level - level) *
> >                adev->vm_manager.block_size;
> >        unsigned pt_idx, from, to;
> > @@ -299,6 +300,14 @@ static int amdgpu_vm_alloc_levels(struct 
> amdgpu_device *adev,
> >        saddr = saddr & ((1 << shift) - 1);
> >        eaddr = eaddr & ((1 << shift) - 1);
> >
> > +     flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
> > +                     AMDGPU_GEM_CREATE_VRAM_CLEARED;
> > +     if (vm->use_cpu_for_update)
> > +             flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
> I think shadow flag is need for CPU case as well, which is used to
> backup VM bo and meaningful when gpu reset.
> same comment for pd bo.
>
> [HK]: Yes support for shadow BOs are desirable and it could be 
> implemented as a separate commit. For supporting shadow BOs the caller 
> should explicitly add shadow BOs into ttm_eu_reserve_buffer(..) to 
> remove the BO from TTM swap list or ttm_bo_kmap has to be modified. 
> This implementation for CPU update of VM page tables is mainly for KFD 
> usage. Graphics will use for experimental and testing purpose. From 
> KFD's view point shadow BO are not useful because if GPU is reset then 
> all queue information is lost (since submissions are done by user 
> space) and it is not possible to recover.
Either way is fine to me.

David Zhou
>
> Regards,
> David Zhou
> > +     else
> > +             flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
> > + AMDGPU_GEM_CREATE_SHADOW);
> > +
> >        /* walk over the address space and allocate the page tables */
> >        for (pt_idx = from; pt_idx <= to; ++pt_idx) {
> >                struct reservation_object *resv = vm->root.bo->tbo.resv;
> > @@ -310,10 +319,7 @@ static int amdgpu_vm_alloc_levels(struct 
> amdgpu_device *adev,
> > amdgpu_vm_bo_size(adev, level),
> > AMDGPU_GPU_PAGE_SIZE, true,
> > AMDGPU_GEM_DOMAIN_VRAM,
> > - AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
> > - AMDGPU_GEM_CREATE_SHADOW |
> > - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
> > - AMDGPU_GEM_CREATE_VRAM_CLEARED,
> > +                                          flags,
> >                                             NULL, resv, &pt);
> >                        if (r)
> >                                return r;
> > @@ -952,6 +958,43 @@ static uint64_t amdgpu_vm_map_gart(const 
> dma_addr_t *pages_addr, uint64_t addr)
> >        return result;
> >   }
> >
> > +/**
> > + * amdgpu_vm_cpu_set_ptes - helper to update page tables via CPU
> > + *
> > + * @params: see amdgpu_pte_update_params definition
> > + * @pe: kmap addr of the page entry
> > + * @addr: dst addr to write into pe
> > + * @count: number of page entries to update
> > + * @incr: increase next addr by incr bytes
> > + * @flags: hw access flags
> > + */
> > +static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params 
> *params,
> > +                                uint64_t pe, uint64_t addr,
> > +                                unsigned count, uint32_t incr,
> > +                                uint64_t flags)
> > +{
> > +     unsigned int i;
> > +
> > +     for (i = 0; i < count; i++) {
> > +             amdgpu_gart_set_pte_pde(params->adev, (void *)pe,
> > +                                     i, addr, flags);
> > +             addr += incr;
> > +     }
> > +
> > +     mb();
> > +     amdgpu_gart_flush_gpu_tlb(params->adev, 0);
> > +}
> > +
> > +static void amdgpu_vm_bo_wait(struct amdgpu_device *adev, struct 
> amdgpu_bo *bo)
> > +{
> > +     struct amdgpu_sync sync;
> > +
> > +     amdgpu_sync_create(&sync);
> > +     amdgpu_sync_resv(adev, &sync, bo->tbo.resv, 
> AMDGPU_FENCE_OWNER_VM);
> > +     amdgpu_sync_wait(&sync);
> > +     amdgpu_sync_free(&sync);
> > +}
> > +
> >   /*
> >    * amdgpu_vm_update_level - update a single level in the hierarchy
> >    *
> > @@ -981,34 +1024,50 @@ static int amdgpu_vm_update_level(struct 
> amdgpu_device *adev,
> >
> >        if (!parent->entries)
> >                return 0;
> > -     ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
> >
> > -     /* padding, etc. */
> > -     ndw = 64;
> > +     memset(&params, 0, sizeof(params));
> > +     params.adev = adev;
> > +     shadow = parent->bo->shadow;
> >
> > -     /* assume the worst case */
> > -     ndw += parent->last_entry_used * 6;
> > +     WARN_ON(vm->use_cpu_for_update && shadow);
> > +     if (vm->use_cpu_for_update && !shadow) {
> > +             r = amdgpu_bo_kmap(parent->bo, (void **)&pd_addr);
> > +             if (r)
> > +                     return r;
> > +             amdgpu_vm_bo_wait(adev, parent->bo);
> > +             params.func = amdgpu_vm_cpu_set_ptes;
> > +     } else {
> > +             if (shadow) {
> > +                     r = amdgpu_ttm_bind(&shadow->tbo, 
> &shadow->tbo.mem);
> > +                     if (r)
> > +                             return r;
> > +             }
> > +             ring = container_of(vm->entity.sched, struct amdgpu_ring,
> > +                                 sched);
> >
> > -     pd_addr = amdgpu_bo_gpu_offset(parent->bo);
> > +             /* padding, etc. */
> > +             ndw = 64;
> >
> > -     shadow = parent->bo->shadow;
> > -     if (shadow) {
> > -             r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem);
> > +             /* assume the worst case */
> > +             ndw += parent->last_entry_used * 6;
> > +
> > +             pd_addr = amdgpu_bo_gpu_offset(parent->bo);
> > +
> > +             if (shadow) {
> > +                     shadow_addr = amdgpu_bo_gpu_offset(shadow);
> > +                     ndw *= 2;
> > +             } else {
> > +                     shadow_addr = 0;
> > +             }
> > +
> > +             r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
> >                if (r)
> >                        return r;
> > -             shadow_addr = amdgpu_bo_gpu_offset(shadow);
> > -             ndw *= 2;
> > -     } else {
> > -             shadow_addr = 0;
> > -     }
> >
> > -     r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
> > -     if (r)
> > -             return r;
> > +             params.ib = &job->ibs[0];
> > +             params.func = amdgpu_vm_do_set_ptes;
> > +     }
> >
> > -     memset(&params, 0, sizeof(params));
> > -     params.adev = adev;
> > -     params.ib = &job->ibs[0];
> >
> >        /* walk over the address space and update the directory */
> >        for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
> > @@ -1043,15 +1102,15 @@ static int amdgpu_vm_update_level(struct 
> amdgpu_device *adev,
> > amdgpu_vm_adjust_mc_addr(adev, last_pt);
> >
> >                                if (shadow)
> > - amdgpu_vm_do_set_ptes(&params,
> > - last_shadow,
> > - pt_addr, count,
> > - incr,
> > - AMDGPU_PTE_VALID);
> > -
> > - amdgpu_vm_do_set_ptes(&params, last_pde,
> > - pt_addr, count, incr,
> > - AMDGPU_PTE_VALID);
> > + params.func(&params,
> > + last_shadow,
> > + pt_addr, count,
> > +                                                 incr,
> > + AMDGPU_PTE_VALID);
> > +
> > +                             params.func(&params, last_pde,
> > +                                         pt_addr, count, incr,
> > + AMDGPU_PTE_VALID);
> >                        }
> >
> >                        count = 1;
> > @@ -1067,14 +1126,16 @@ static int amdgpu_vm_update_level(struct 
> amdgpu_device *adev,
> >                uint64_t pt_addr = amdgpu_vm_adjust_mc_addr(adev, 
> last_pt);
> >
> >                if (vm->root.bo->shadow)
> > - amdgpu_vm_do_set_ptes(&params, last_shadow, pt_addr,
> > -                                           count, incr, 
> AMDGPU_PTE_VALID);
> > +                     params.func(&params, last_shadow, pt_addr,
> > +                                 count, incr, AMDGPU_PTE_VALID);
> >
> > -             amdgpu_vm_do_set_ptes(&params, last_pde, pt_addr,
> > -                                   count, incr, AMDGPU_PTE_VALID);
> > +             params.func(&params, last_pde, pt_addr,
> > +                         count, incr, AMDGPU_PTE_VALID);
> >        }
> >
> > -     if (params.ib->length_dw == 0) {
> > +     if (params.func == amdgpu_vm_cpu_set_ptes)
> > +             amdgpu_bo_kunmap(parent->bo);
> > +     else if (params.ib->length_dw == 0) {
> >                amdgpu_job_free(job);
> >        } else {
> >                amdgpu_ring_pad_ib(ring, params.ib);
> > @@ -2309,6 +2370,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, 
> struct amdgpu_vm *vm,
> >        struct amdgpu_ring *ring;
> >        struct amd_sched_rq *rq;
> >        int r, i;
> > +     u64 flags;
> >
> >        vm->va = RB_ROOT;
> >        vm->client_id = 
> atomic64_inc_return(&adev->vm_manager.client_counter);
> > @@ -2342,12 +2404,17 @@ int amdgpu_vm_init(struct amdgpu_device 
> *adev, struct amdgpu_vm *vm,
> >                  "CPU update of VM recommended only for large BAR 
> system\n");
> >        vm->last_dir_update = NULL;
> >
> > +     flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
> > +                     AMDGPU_GEM_CREATE_VRAM_CLEARED;
> > +     if (vm->use_cpu_for_update)
> > +             flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
> > +     else
> > +             flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
> > + AMDGPU_GEM_CREATE_SHADOW);
> > +
> >        r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0), align, 
> true,
> >                             AMDGPU_GEM_DOMAIN_VRAM,
> > - AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
> > -                          AMDGPU_GEM_CREATE_SHADOW |
> > - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
> > - AMDGPU_GEM_CREATE_VRAM_CLEARED,
> > +                          flags,
> >                             NULL, NULL, &vm->root.bo);
> >        if (r)
> >                goto error_free_sched_entity;
>

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/amd-gfx/attachments/20170517/c3f0fc26/attachment-0001.html>