issue: Game F1 performance drops 13% when per vm bo is enabled. root cause: if some BOs are fallback to allowed domain, they will never be validated if no eviction happens, that means they always exist in allowed domain. Fix: maintain a per vm allowed domain BOs list, then try to validated them with perferred domain. Change-Id: I4335470bf867b46ac93c8e2531eac3f8ba9ac2da Signed-off-by: Chunming Zhou <david1.zhou at amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 15 +++++++---- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 49 ++++++++++++++++++++++++++++++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 7 ++++- 3 files changed, 63 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 383bf2d31c92..7509b6bd2047 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -359,7 +359,7 @@ void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes, } static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p, - struct amdgpu_bo *bo) + struct amdgpu_bo *bo, bool *allowed) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); struct ttm_operation_ctx ctx = { @@ -374,6 +374,8 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p, if (bo->pin_count) return 0; + if (allowed) + *allowed = false; /* Don't move this buffer if we have depleted our allowance * to move it. Don't move anything if the threshold is zero. */ @@ -396,6 +398,9 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p, } retry: + if (domain != bo->preferred_domains && domain == bo->allowed_domains && + allowed) + *allowed = true; amdgpu_ttm_placement_from_domain(bo, domain); r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); @@ -479,19 +484,19 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p, return false; } -static int amdgpu_cs_validate(void *param, struct amdgpu_bo *bo) +static int amdgpu_cs_validate(void *param, struct amdgpu_bo *bo, bool *allowed) { struct amdgpu_cs_parser *p = param; int r; do { - r = amdgpu_cs_bo_validate(p, bo); + r = amdgpu_cs_bo_validate(p, bo, allowed); } while (r == -ENOMEM && amdgpu_cs_try_evict(p, bo)); if (r) return r; if (bo->shadow) - r = amdgpu_cs_bo_validate(p, bo->shadow); + r = amdgpu_cs_bo_validate(p, bo->shadow, NULL); return r; } @@ -528,7 +533,7 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, if (p->evictable == lobj) p->evictable = NULL; - r = amdgpu_cs_validate(p, bo); + r = amdgpu_cs_validate(p, bo, NULL); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index e9a41dd05345..365e8dca05f5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -186,6 +186,35 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, list_add(&entry->tv.head, validated); } +static int amdgpu_vm_try_validate_allowed(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + int (*validate)(void *p, + struct amdgpu_bo *bo, + bool *allowed), + void *param) +{ + struct amdgpu_vm_bo_base *bo_base, *tmp; + int r; + bool allowed; + + spin_lock(&vm->status_lock); + list_for_each_entry_safe(bo_base, tmp, &vm->allowed_domain, + allowed_domain_list) { + spin_unlock(&vm->status_lock); + r = validate(param, bo_base->bo, &allowed); + if (r) + return r; + spin_lock(&vm->status_lock); + if (!allowed) + list_del_init(&bo_base->allowed_domain_list); + if (bo_base->bo->tbo.type != ttm_bo_type_kernel) + list_move(&bo_base->vm_status, &vm->moved); + else + list_move(&bo_base->vm_status, &vm->relocated); + } + spin_unlock(&vm->status_lock); + return 0; +} /** * amdgpu_vm_validate_pt_bos - validate the page table BOs * @@ -197,16 +226,19 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, * Validate the page table BOs on command submission if neccessary. */ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, - int (*validate)(void *p, struct amdgpu_bo *bo), + int (*validate)(void *p, struct amdgpu_bo *bo, + bool *allowed), void *param) { struct ttm_bo_global *glob = adev->mman.bdev.glob; + LIST_HEAD(tmp_allowed); int r; spin_lock(&vm->status_lock); while (!list_empty(&vm->evicted)) { struct amdgpu_vm_bo_base *bo_base; struct amdgpu_bo *bo; + bool allowed = false; bo_base = list_first_entry(&vm->evicted, struct amdgpu_vm_bo_base, @@ -216,7 +248,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, bo = bo_base->bo; BUG_ON(!bo); if (bo->parent) { - r = validate(param, bo); + r = validate(param, bo, &allowed); if (r) return r; @@ -235,6 +267,10 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, } spin_lock(&vm->status_lock); + if (allowed) + list_add_tail(&bo_base->allowed_domain_list, + &tmp_allowed); + if (bo->tbo.type != ttm_bo_type_kernel) list_move(&bo_base->vm_status, &vm->moved); else @@ -242,6 +278,12 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, } spin_unlock(&vm->status_lock); + r = amdgpu_vm_try_validate_allowed(adev, vm, validate, param); + if (r) + return r; + spin_lock(&vm->status_lock); + list_splice_init(&tmp_allowed, &vm->allowed_domain); + spin_unlock(&vm->status_lock); return 0; } @@ -1868,6 +1910,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, bo_va->base.bo = bo; INIT_LIST_HEAD(&bo_va->base.bo_list); INIT_LIST_HEAD(&bo_va->base.vm_status); + INIT_LIST_HEAD(&bo_va->base.allowed_domain_list); bo_va->ref_count = 1; INIT_LIST_HEAD(&bo_va->valids); @@ -2237,6 +2280,7 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, spin_lock(&vm->status_lock); list_del(&bo_va->base.vm_status); + list_del(&bo_va->base.allowed_domain_list); spin_unlock(&vm->status_lock); list_for_each_entry_safe(mapping, next, &bo_va->valids, list) { @@ -2409,6 +2453,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) vm->reserved_vmid[i] = NULL; spin_lock_init(&vm->status_lock); + INIT_LIST_HEAD(&vm->allowed_domain); INIT_LIST_HEAD(&vm->evicted); INIT_LIST_HEAD(&vm->relocated); INIT_LIST_HEAD(&vm->moved); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index cf2c667ee538..54c39d3ea6d8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -149,6 +149,7 @@ struct amdgpu_vm_bo_base { /* protected by spinlock */ struct list_head vm_status; + struct list_head allowed_domain_list; /* protected by the BO being reserved */ bool moved; @@ -177,6 +178,9 @@ struct amdgpu_vm { /* protecting invalidated */ spinlock_t status_lock; + /* per vm bo is validated to allowed domain */ + struct list_head allowed_domain; + /* BOs who needs a validation */ struct list_head evicted; @@ -266,7 +270,8 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, struct amdgpu_bo_list_entry *entry); bool amdgpu_vm_ready(struct amdgpu_vm *vm); int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, - int (*callback)(void *p, struct amdgpu_bo *bo), + int (*callback)(void *p, struct amdgpu_bo *bo, + bool *allowed), void *param); int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, struct amdgpu_vm *vm, -- 2.14.1