Am 13.09.2018 um 00:58 schrieb Felix Kuehling: > Is the small reduction in memory footprint (8 bytes per page table on a > 64-bit kernel) really worth the trouble of open-coding a single-linked > list implementation? Well the key point is it is now a power of two again. So we don't waste 28KB per page table (on 4 levels) any more because it is rounded up to the next order size :) > I guess this change makes a bigger difference for > 2-level page tables than it does for 4-level, because the amdgpu_vm_pt > array is allocated at the page directory level and includes page tables > that don't even exist yet and may never exist. The amount of memory you > save is the same as the size of the page directory. > > I wonder if the overhead could be reduced more effectively by allocating > struct amdgpu_vm_pt with the page table, rather than with the page > directory. Then the amdgpu_vm_pt.entries array would be an array of > pointers instead. It could be an array[0] at the end of the structure > since the number of entries is know then the page directory is > allocated. The BO could also be embedded in the amdgpu_vm_pt structure > so it doesn't need to be a separate allocation from the amdgpu_vm_pt. Yeah, thought about that as well. But the change looked to invasive on first glance. > Acked-by: Felix Kuehling <Felix.Kuehling at amd.com> Thanks, Christian. > > Regards, >  Felix > > > On 2018-09-12 04:55 AM, Christian König wrote: >> Instead of the double linked list. Gets the size of amdgpu_vm_pt down to >> 64 bytes again. >> >> We could even reduce it down to 32 bytes, but that would require some >> rather extreme hacks. >> >> Signed-off-by: Christian König <christian.koenig at amd.com> >> --- >> drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 2 +- >> drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 4 ++-- >> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 38 ++++++++++++++++++++---------- >> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 2 +- >> 4 files changed, 29 insertions(+), 17 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c >> index de990bdcdd6c..e6909252aefa 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c >> @@ -448,7 +448,7 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, >> return -ENOMEM; >> drm_gem_private_object_init(adev->ddev, &bo->gem_base, size); >> INIT_LIST_HEAD(&bo->shadow_list); >> - INIT_LIST_HEAD(&bo->va); >> + bo->vm_bo = NULL; >> bo->preferred_domains = bp->preferred_domain ? bp->preferred_domain : >> bp->domain; >> bo->allowed_domains = bo->preferred_domains; >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h >> index 907fdf46d895..64337ff2ad63 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h >> @@ -89,8 +89,8 @@ struct amdgpu_bo { >> void *metadata; >> u32 metadata_size; >> unsigned prime_shared_count; >> - /* list of all virtual address to which this bo is associated to */ >> - struct list_head va; >> + /* per VM structure for page tables and with virtual addresses */ >> + struct amdgpu_vm_bo_base *vm_bo; >> /* Constant after initialization */ >> struct drm_gem_object gem_base; >> struct amdgpu_bo *parent; >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >> index cb6a5114128e..fb6b16273c54 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >> @@ -309,12 +309,13 @@ static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, >> { >> base->vm = vm; >> base->bo = bo; >> - INIT_LIST_HEAD(&base->bo_list); >> + base->next = NULL; >> INIT_LIST_HEAD(&base->vm_status); >> >> if (!bo) >> return; >> - list_add_tail(&base->bo_list, &bo->va); >> + base->next = bo->vm_bo; >> + bo->vm_bo = base; >> >> if (bo->tbo.resv != vm->root.base.bo->tbo.resv) >> return; >> @@ -352,7 +353,7 @@ static struct amdgpu_vm_pt *amdgpu_vm_pt_parent(struct amdgpu_vm_pt *pt) >> if (!parent) >> return NULL; >> >> - return list_first_entry(&parent->va, struct amdgpu_vm_pt, base.bo_list); >> + return container_of(parent->vm_bo, struct amdgpu_vm_pt, base); >> } >> >> /** >> @@ -954,7 +955,7 @@ static void amdgpu_vm_free_pts(struct amdgpu_device *adev, >> for_each_amdgpu_vm_pt_dfs_safe(adev, vm, cursor, entry) { >> >> if (entry->base.bo) { >> - list_del(&entry->base.bo_list); >> + entry->base.bo->vm_bo = NULL; >> list_del(&entry->base.vm_status); >> amdgpu_bo_unref(&entry->base.bo->shadow); >> amdgpu_bo_unref(&entry->base.bo); >> @@ -1162,12 +1163,13 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_ >> struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, >> struct amdgpu_bo *bo) >> { >> - struct amdgpu_bo_va *bo_va; >> + struct amdgpu_vm_bo_base *base; >> >> - list_for_each_entry(bo_va, &bo->va, base.bo_list) { >> - if (bo_va->base.vm == vm) { >> - return bo_va; >> - } >> + for (base = bo->vm_bo; base; base = base->next) { >> + if (base->vm != vm) >> + continue; >> + >> + return container_of(base, struct amdgpu_bo_va, base); >> } >> return NULL; >> } >> @@ -2728,11 +2730,21 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, >> struct amdgpu_bo_va_mapping *mapping, *next; >> struct amdgpu_bo *bo = bo_va->base.bo; >> struct amdgpu_vm *vm = bo_va->base.vm; >> + struct amdgpu_vm_bo_base **base; >> >> - if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv) >> - vm->bulk_moveable = false; >> + if (bo) { >> + if (bo->tbo.resv == vm->root.base.bo->tbo.resv) >> + vm->bulk_moveable = false; >> >> - list_del(&bo_va->base.bo_list); >> + for (base = &bo_va->base.bo->vm_bo; *base; >> + base = &(*base)->next) { >> + if (*base != &bo_va->base) >> + continue; >> + >> + *base = bo_va->base.next; >> + break; >> + } >> + } >> >> spin_lock(&vm->invalidated_lock); >> list_del(&bo_va->base.vm_status); >> @@ -2774,7 +2786,7 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, >> if (bo->parent && bo->parent->shadow == bo) >> bo = bo->parent; >> >> - list_for_each_entry(bo_base, &bo->va, bo_list) { >> + for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) { >> struct amdgpu_vm *vm = bo_base->vm; >> >> if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) { >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >> index e275ee7c1bc1..8966e40767eb 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >> @@ -128,7 +128,7 @@ struct amdgpu_vm_bo_base { >> struct amdgpu_bo *bo; >> >> /* protected by bo being reserved */ >> - struct list_head bo_list; >> + struct amdgpu_vm_bo_base *next; >> >> /* protected by spinlock */ >> struct list_head vm_status;