Am 19.03.2018 um 11:00 schrieb Chunming Zhou: > issue: > Game F1 performance drops 13% when per vm bo is enabled. > > root cause: > if some BOs are fallback to allowed domain, they will never be validated if no eviction happens, > that means they always exist in allowed domain. > > Fix: > maintain a per vm allowed domain BOs list, then try to validated them with perferred domain. The idea sounds good, but I find the implementation a bit overkill. Give me a second to hack something together, Christian. > > Change-Id: I4335470bf867b46ac93c8e2531eac3f8ba9ac2da > Signed-off-by: Chunming Zhou <david1.zhou at amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 15 +++++++---- > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 49 ++++++++++++++++++++++++++++++++-- > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 7 ++++- > 3 files changed, 63 insertions(+), 8 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > index 383bf2d31c92..7509b6bd2047 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > @@ -359,7 +359,7 @@ void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes, > } > > static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p, > - struct amdgpu_bo *bo) > + struct amdgpu_bo *bo, bool *allowed) > { > struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); > struct ttm_operation_ctx ctx = { > @@ -374,6 +374,8 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p, > if (bo->pin_count) > return 0; > > + if (allowed) > + *allowed = false; > /* Don't move this buffer if we have depleted our allowance > * to move it. Don't move anything if the threshold is zero. > */ > @@ -396,6 +398,9 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p, > } > > retry: > + if (domain != bo->preferred_domains && domain == bo->allowed_domains && > + allowed) > + *allowed = true; > amdgpu_ttm_placement_from_domain(bo, domain); > r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); > > @@ -479,19 +484,19 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p, > return false; > } > > -static int amdgpu_cs_validate(void *param, struct amdgpu_bo *bo) > +static int amdgpu_cs_validate(void *param, struct amdgpu_bo *bo, bool *allowed) > { > struct amdgpu_cs_parser *p = param; > int r; > > do { > - r = amdgpu_cs_bo_validate(p, bo); > + r = amdgpu_cs_bo_validate(p, bo, allowed); > } while (r == -ENOMEM && amdgpu_cs_try_evict(p, bo)); > if (r) > return r; > > if (bo->shadow) > - r = amdgpu_cs_bo_validate(p, bo->shadow); > + r = amdgpu_cs_bo_validate(p, bo->shadow, NULL); > > return r; > } > @@ -528,7 +533,7 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, > if (p->evictable == lobj) > p->evictable = NULL; > > - r = amdgpu_cs_validate(p, bo); > + r = amdgpu_cs_validate(p, bo, NULL); > if (r) > return r; > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > index e9a41dd05345..365e8dca05f5 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > @@ -186,6 +186,35 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, > list_add(&entry->tv.head, validated); > } > > +static int amdgpu_vm_try_validate_allowed(struct amdgpu_device *adev, > + struct amdgpu_vm *vm, > + int (*validate)(void *p, > + struct amdgpu_bo *bo, > + bool *allowed), > + void *param) > +{ > + struct amdgpu_vm_bo_base *bo_base, *tmp; > + int r; > + bool allowed; > + > + spin_lock(&vm->status_lock); > + list_for_each_entry_safe(bo_base, tmp, &vm->allowed_domain, > + allowed_domain_list) { > + spin_unlock(&vm->status_lock); > + r = validate(param, bo_base->bo, &allowed); > + if (r) > + return r; > + spin_lock(&vm->status_lock); > + if (!allowed) > + list_del_init(&bo_base->allowed_domain_list); > + if (bo_base->bo->tbo.type != ttm_bo_type_kernel) > + list_move(&bo_base->vm_status, &vm->moved); > + else > + list_move(&bo_base->vm_status, &vm->relocated); > + } > + spin_unlock(&vm->status_lock); > + return 0; > +} > /** > * amdgpu_vm_validate_pt_bos - validate the page table BOs > * > @@ -197,16 +226,19 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, > * Validate the page table BOs on command submission if neccessary. > */ > int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, > - int (*validate)(void *p, struct amdgpu_bo *bo), > + int (*validate)(void *p, struct amdgpu_bo *bo, > + bool *allowed), > void *param) > { > struct ttm_bo_global *glob = adev->mman.bdev.glob; > + LIST_HEAD(tmp_allowed); > int r; > > spin_lock(&vm->status_lock); > while (!list_empty(&vm->evicted)) { > struct amdgpu_vm_bo_base *bo_base; > struct amdgpu_bo *bo; > + bool allowed = false; > > bo_base = list_first_entry(&vm->evicted, > struct amdgpu_vm_bo_base, > @@ -216,7 +248,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, > bo = bo_base->bo; > BUG_ON(!bo); > if (bo->parent) { > - r = validate(param, bo); > + r = validate(param, bo, &allowed); > if (r) > return r; > > @@ -235,6 +267,10 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, > } > > spin_lock(&vm->status_lock); > + if (allowed) > + list_add_tail(&bo_base->allowed_domain_list, > + &tmp_allowed); > + > if (bo->tbo.type != ttm_bo_type_kernel) > list_move(&bo_base->vm_status, &vm->moved); > else > @@ -242,6 +278,12 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, > } > spin_unlock(&vm->status_lock); > > + r = amdgpu_vm_try_validate_allowed(adev, vm, validate, param); > + if (r) > + return r; > + spin_lock(&vm->status_lock); > + list_splice_init(&tmp_allowed, &vm->allowed_domain); > + spin_unlock(&vm->status_lock); > return 0; > } > > @@ -1868,6 +1910,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, > bo_va->base.bo = bo; > INIT_LIST_HEAD(&bo_va->base.bo_list); > INIT_LIST_HEAD(&bo_va->base.vm_status); > + INIT_LIST_HEAD(&bo_va->base.allowed_domain_list); > > bo_va->ref_count = 1; > INIT_LIST_HEAD(&bo_va->valids); > @@ -2237,6 +2280,7 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, > > spin_lock(&vm->status_lock); > list_del(&bo_va->base.vm_status); > + list_del(&bo_va->base.allowed_domain_list); > spin_unlock(&vm->status_lock); > > list_for_each_entry_safe(mapping, next, &bo_va->valids, list) { > @@ -2409,6 +2453,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, > for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) > vm->reserved_vmid[i] = NULL; > spin_lock_init(&vm->status_lock); > + INIT_LIST_HEAD(&vm->allowed_domain); > INIT_LIST_HEAD(&vm->evicted); > INIT_LIST_HEAD(&vm->relocated); > INIT_LIST_HEAD(&vm->moved); > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h > index cf2c667ee538..54c39d3ea6d8 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h > @@ -149,6 +149,7 @@ struct amdgpu_vm_bo_base { > > /* protected by spinlock */ > struct list_head vm_status; > + struct list_head allowed_domain_list; > > /* protected by the BO being reserved */ > bool moved; > @@ -177,6 +178,9 @@ struct amdgpu_vm { > /* protecting invalidated */ > spinlock_t status_lock; > > + /* per vm bo is validated to allowed domain */ > + struct list_head allowed_domain; > + > /* BOs who needs a validation */ > struct list_head evicted; > > @@ -266,7 +270,8 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, > struct amdgpu_bo_list_entry *entry); > bool amdgpu_vm_ready(struct amdgpu_vm *vm); > int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, > - int (*callback)(void *p, struct amdgpu_bo *bo), > + int (*callback)(void *p, struct amdgpu_bo *bo, > + bool *allowed), > void *param); > int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, > struct amdgpu_vm *vm,