On 2022-01-18 at 17:50:34 +0000, Robert Beckett wrote: > From: Matthew Auld <matthew.auld@xxxxxxxxx> > > For local-memory objects we need to align the GTT addresses > to 64K, both for the ppgtt and ggtt. > > We need to support vm->min_alignment > 4K, depending > on the vm itself and the type of object we are inserting. > With this in mind update the GTT selftests to take this > into account. > > For DG2 we further align and pad lmem object GTT addresses > to 2MB to ensure PDEs contain consistent page sizes as > required by the HW. > > Signed-off-by: Matthew Auld <matthew.auld@xxxxxxxxx> > Signed-off-by: Ramalingam C <ramalingam.c@xxxxxxxxx> > Signed-off-by: Robert Beckett <bob.beckett@xxxxxxxxxxxxx> > Cc: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx> > Cc: Rodrigo Vivi <rodrigo.vivi@xxxxxxxxx> > --- > .../i915/gem/selftests/i915_gem_client_blt.c | 23 +++-- > drivers/gpu/drm/i915/gt/intel_gtt.c | 14 +++ > drivers/gpu/drm/i915/gt/intel_gtt.h | 9 ++ > drivers/gpu/drm/i915/i915_vma.c | 14 +++ > drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 96 ++++++++++++------- > 5 files changed, 115 insertions(+), 41 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c > index c08f766e6e15..7fee95a65414 100644 > --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c > +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c > @@ -39,6 +39,7 @@ struct tiled_blits { > struct blit_buffer scratch; > struct i915_vma *batch; > u64 hole; > + u64 align; > u32 width; > u32 height; > }; > @@ -410,14 +411,21 @@ tiled_blits_create(struct intel_engine_cs *engine, struct rnd_state *prng) > goto err_free; > } > > - hole_size = 2 * PAGE_ALIGN(WIDTH * HEIGHT * 4); > + t->align = I915_GTT_PAGE_SIZE_2M; /* XXX worst case, derive from vm! */ > + t->align = max(t->align, > + i915_vm_min_alignment(t->ce->vm, INTEL_MEMORY_LOCAL)); > + t->align = max(t->align, > + i915_vm_min_alignment(t->ce->vm, INTEL_MEMORY_SYSTEM)); > + > + hole_size = 2 * round_up(WIDTH * HEIGHT * 4, t->align); > hole_size *= 2; /* room to maneuver */ > - hole_size += 2 * I915_GTT_MIN_ALIGNMENT; > + hole_size += 2 * t->align; /* padding on either side */ > > mutex_lock(&t->ce->vm->mutex); > memset(&hole, 0, sizeof(hole)); > err = drm_mm_insert_node_in_range(&t->ce->vm->mm, &hole, > - hole_size, 0, I915_COLOR_UNEVICTABLE, > + hole_size, t->align, > + I915_COLOR_UNEVICTABLE, > 0, U64_MAX, > DRM_MM_INSERT_BEST); > if (!err) > @@ -428,7 +436,7 @@ tiled_blits_create(struct intel_engine_cs *engine, struct rnd_state *prng) > goto err_put; > } > > - t->hole = hole.start + I915_GTT_MIN_ALIGNMENT; > + t->hole = hole.start + t->align; > pr_info("Using hole at %llx\n", t->hole); > > err = tiled_blits_create_buffers(t, WIDTH, HEIGHT, prng); > @@ -455,7 +463,7 @@ static void tiled_blits_destroy(struct tiled_blits *t) > static int tiled_blits_prepare(struct tiled_blits *t, > struct rnd_state *prng) > { > - u64 offset = PAGE_ALIGN(t->width * t->height * 4); > + u64 offset = round_up(t->width * t->height * 4, t->align); > u32 *map; > int err; > int i; > @@ -486,8 +494,7 @@ static int tiled_blits_prepare(struct tiled_blits *t, > > static int tiled_blits_bounce(struct tiled_blits *t, struct rnd_state *prng) > { > - u64 offset = > - round_up(t->width * t->height * 4, 2 * I915_GTT_MIN_ALIGNMENT); > + u64 offset = round_up(t->width * t->height * 4, 2 * t->align); > int err; > > /* We want to check position invariant tiling across GTT eviction */ > @@ -500,7 +507,7 @@ static int tiled_blits_bounce(struct tiled_blits *t, struct rnd_state *prng) > > /* Reposition so that we overlap the old addresses, and slightly off */ > err = tiled_blit(t, > - &t->buffers[2], t->hole + I915_GTT_MIN_ALIGNMENT, > + &t->buffers[2], t->hole + t->align, > &t->buffers[1], t->hole + 3 * offset / 2); > if (err) > return err; > diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c > index 46be4197b93f..7c92b25c0f26 100644 > --- a/drivers/gpu/drm/i915/gt/intel_gtt.c > +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c > @@ -223,6 +223,20 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass) > > GEM_BUG_ON(!vm->total); > drm_mm_init(&vm->mm, 0, vm->total); > + > + memset64(vm->min_alignment, I915_GTT_MIN_ALIGNMENT, > + ARRAY_SIZE(vm->min_alignment)); > + > + if (HAS_64K_PAGES(vm->i915)) { > + if (IS_DG2(vm->i915)) { I think we need this 2M alignment for all platform with HAS_64K_PAGES. Not only for DG2. > + vm->min_alignment[INTEL_MEMORY_LOCAL] = I915_GTT_PAGE_SIZE_2M; > + vm->min_alignment[INTEL_MEMORY_STOLEN_LOCAL] = I915_GTT_PAGE_SIZE_2M; > + } else { > + vm->min_alignment[INTEL_MEMORY_LOCAL] = I915_GTT_PAGE_SIZE_64K; > + vm->min_alignment[INTEL_MEMORY_STOLEN_LOCAL] = I915_GTT_PAGE_SIZE_64K; > + } > + } > + > vm->mm.head_node.color = I915_COLOR_UNEVICTABLE; > > INIT_LIST_HEAD(&vm->bound_list); > diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h > index 8073438b67c8..b8da2514d601 100644 > --- a/drivers/gpu/drm/i915/gt/intel_gtt.h > +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h > @@ -29,6 +29,8 @@ > #include "i915_selftest.h" > #include "i915_vma_resource.h" > #include "i915_vma_types.h" > +#include "i915_params.h" > +#include "intel_memory_region.h" > > #define I915_GFP_ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN) > > @@ -223,6 +225,7 @@ struct i915_address_space { > struct device *dma; > u64 total; /* size addr space maps (ex. 2GB for ggtt) */ > u64 reserved; /* size addr space reserved */ > + u64 min_alignment[INTEL_MEMORY_STOLEN_LOCAL + 1]; > > unsigned int bind_async_flags; > > @@ -384,6 +387,12 @@ i915_vm_has_scratch_64K(struct i915_address_space *vm) > return vm->scratch_order == get_order(I915_GTT_PAGE_SIZE_64K); > } > > +static inline u64 i915_vm_min_alignment(struct i915_address_space *vm, > + enum intel_memory_type type) > +{ > + return vm->min_alignment[type]; > +} > + > static inline bool > i915_vm_has_cache_coloring(struct i915_address_space *vm) > { > diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c > index 1f15c3298112..9ac92e7a3566 100644 > --- a/drivers/gpu/drm/i915/i915_vma.c > +++ b/drivers/gpu/drm/i915/i915_vma.c > @@ -756,6 +756,20 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) > } > > color = 0; > + > + if (HAS_64K_PAGES(vma->vm->i915) && i915_gem_object_is_lmem(vma->obj)) { > + alignment = max(alignment, I915_GTT_PAGE_SIZE_64K); > + /* > + * DG2 can not have different sized pages in any given PDE (2MB range). > + * Keeping things simple, we force any lmem object to reserve > + * 2MB chunks, preventing any smaller pages being used alongside > + */ > + if (IS_DG2(vma->vm->i915)) { Similarly here we dont need special case for DG2. Ram > + alignment = max(alignment, I915_GTT_PAGE_SIZE_2M); > + size = round_up(size, I915_GTT_PAGE_SIZE_2M); > + } > + } > + > if (i915_vm_has_cache_coloring(vma->vm)) > color = vma->obj->cache_level; > > diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c > index 076d860ce01a..2f3f0c01786b 100644 > --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c > +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c > @@ -238,6 +238,8 @@ static int lowlevel_hole(struct i915_address_space *vm, > u64 hole_start, u64 hole_end, > unsigned long end_time) > { > + const unsigned int min_alignment = > + i915_vm_min_alignment(vm, INTEL_MEMORY_SYSTEM); > I915_RND_STATE(seed_prng); > struct i915_vma_resource *mock_vma_res; > unsigned int size; > @@ -251,9 +253,10 @@ static int lowlevel_hole(struct i915_address_space *vm, > I915_RND_SUBSTATE(prng, seed_prng); > struct drm_i915_gem_object *obj; > unsigned int *order, count, n; > - u64 hole_size; > + u64 hole_size, aligned_size; > > - hole_size = (hole_end - hole_start) >> size; > + aligned_size = max_t(u32, ilog2(min_alignment), size); > + hole_size = (hole_end - hole_start) >> aligned_size; > if (hole_size > KMALLOC_MAX_SIZE / sizeof(u32)) > hole_size = KMALLOC_MAX_SIZE / sizeof(u32); > count = hole_size >> 1; > @@ -274,8 +277,8 @@ static int lowlevel_hole(struct i915_address_space *vm, > } > GEM_BUG_ON(!order); > > - GEM_BUG_ON(count * BIT_ULL(size) > vm->total); > - GEM_BUG_ON(hole_start + count * BIT_ULL(size) > hole_end); > + GEM_BUG_ON(count * BIT_ULL(aligned_size) > vm->total); > + GEM_BUG_ON(hole_start + count * BIT_ULL(aligned_size) > hole_end); > > /* Ignore allocation failures (i.e. don't report them as > * a test failure) as we are purposefully allocating very > @@ -298,10 +301,10 @@ static int lowlevel_hole(struct i915_address_space *vm, > } > > for (n = 0; n < count; n++) { > - u64 addr = hole_start + order[n] * BIT_ULL(size); > + u64 addr = hole_start + order[n] * BIT_ULL(aligned_size); > intel_wakeref_t wakeref; > > - GEM_BUG_ON(addr + BIT_ULL(size) > vm->total); > + GEM_BUG_ON(addr + BIT_ULL(aligned_size) > vm->total); > > if (igt_timeout(end_time, > "%s timed out before %d/%d\n", > @@ -344,7 +347,7 @@ static int lowlevel_hole(struct i915_address_space *vm, > } > > mock_vma_res->bi.pages = obj->mm.pages; > - mock_vma_res->node_size = BIT_ULL(size); > + mock_vma_res->node_size = BIT_ULL(aligned_size); > mock_vma_res->start = addr; > > with_intel_runtime_pm(vm->gt->uncore->rpm, wakeref) > @@ -355,7 +358,7 @@ static int lowlevel_hole(struct i915_address_space *vm, > > i915_random_reorder(order, count, &prng); > for (n = 0; n < count; n++) { > - u64 addr = hole_start + order[n] * BIT_ULL(size); > + u64 addr = hole_start + order[n] * BIT_ULL(aligned_size); > intel_wakeref_t wakeref; > > GEM_BUG_ON(addr + BIT_ULL(size) > vm->total); > @@ -399,8 +402,10 @@ static int fill_hole(struct i915_address_space *vm, > { > const u64 hole_size = hole_end - hole_start; > struct drm_i915_gem_object *obj; > + const unsigned int min_alignment = > + i915_vm_min_alignment(vm, INTEL_MEMORY_SYSTEM); > const unsigned long max_pages = > - min_t(u64, ULONG_MAX - 1, hole_size/2 >> PAGE_SHIFT); > + min_t(u64, ULONG_MAX - 1, (hole_size / 2) >> ilog2(min_alignment)); > const unsigned long max_step = max(int_sqrt(max_pages), 2UL); > unsigned long npages, prime, flags; > struct i915_vma *vma; > @@ -441,14 +446,17 @@ static int fill_hole(struct i915_address_space *vm, > > offset = p->offset; > list_for_each_entry(obj, &objects, st_link) { > + u64 aligned_size = round_up(obj->base.size, > + min_alignment); > + > vma = i915_vma_instance(obj, vm, NULL); > if (IS_ERR(vma)) > continue; > > if (p->step < 0) { > - if (offset < hole_start + obj->base.size) > + if (offset < hole_start + aligned_size) > break; > - offset -= obj->base.size; > + offset -= aligned_size; > } > > err = i915_vma_pin(vma, 0, 0, offset | flags); > @@ -470,22 +478,25 @@ static int fill_hole(struct i915_address_space *vm, > i915_vma_unpin(vma); > > if (p->step > 0) { > - if (offset + obj->base.size > hole_end) > + if (offset + aligned_size > hole_end) > break; > - offset += obj->base.size; > + offset += aligned_size; > } > } > > offset = p->offset; > list_for_each_entry(obj, &objects, st_link) { > + u64 aligned_size = round_up(obj->base.size, > + min_alignment); > + > vma = i915_vma_instance(obj, vm, NULL); > if (IS_ERR(vma)) > continue; > > if (p->step < 0) { > - if (offset < hole_start + obj->base.size) > + if (offset < hole_start + aligned_size) > break; > - offset -= obj->base.size; > + offset -= aligned_size; > } > > if (!drm_mm_node_allocated(&vma->node) || > @@ -506,22 +517,25 @@ static int fill_hole(struct i915_address_space *vm, > } > > if (p->step > 0) { > - if (offset + obj->base.size > hole_end) > + if (offset + aligned_size > hole_end) > break; > - offset += obj->base.size; > + offset += aligned_size; > } > } > > offset = p->offset; > list_for_each_entry_reverse(obj, &objects, st_link) { > + u64 aligned_size = round_up(obj->base.size, > + min_alignment); > + > vma = i915_vma_instance(obj, vm, NULL); > if (IS_ERR(vma)) > continue; > > if (p->step < 0) { > - if (offset < hole_start + obj->base.size) > + if (offset < hole_start + aligned_size) > break; > - offset -= obj->base.size; > + offset -= aligned_size; > } > > err = i915_vma_pin(vma, 0, 0, offset | flags); > @@ -543,22 +557,25 @@ static int fill_hole(struct i915_address_space *vm, > i915_vma_unpin(vma); > > if (p->step > 0) { > - if (offset + obj->base.size > hole_end) > + if (offset + aligned_size > hole_end) > break; > - offset += obj->base.size; > + offset += aligned_size; > } > } > > offset = p->offset; > list_for_each_entry_reverse(obj, &objects, st_link) { > + u64 aligned_size = round_up(obj->base.size, > + min_alignment); > + > vma = i915_vma_instance(obj, vm, NULL); > if (IS_ERR(vma)) > continue; > > if (p->step < 0) { > - if (offset < hole_start + obj->base.size) > + if (offset < hole_start + aligned_size) > break; > - offset -= obj->base.size; > + offset -= aligned_size; > } > > if (!drm_mm_node_allocated(&vma->node) || > @@ -579,9 +596,9 @@ static int fill_hole(struct i915_address_space *vm, > } > > if (p->step > 0) { > - if (offset + obj->base.size > hole_end) > + if (offset + aligned_size > hole_end) > break; > - offset += obj->base.size; > + offset += aligned_size; > } > } > } > @@ -611,6 +628,7 @@ static int walk_hole(struct i915_address_space *vm, > const u64 hole_size = hole_end - hole_start; > const unsigned long max_pages = > min_t(u64, ULONG_MAX - 1, hole_size >> PAGE_SHIFT); > + unsigned long min_alignment; > unsigned long flags; > u64 size; > > @@ -620,6 +638,8 @@ static int walk_hole(struct i915_address_space *vm, > if (i915_is_ggtt(vm)) > flags |= PIN_GLOBAL; > > + min_alignment = i915_vm_min_alignment(vm, INTEL_MEMORY_SYSTEM); > + > for_each_prime_number_from(size, 1, max_pages) { > struct drm_i915_gem_object *obj; > struct i915_vma *vma; > @@ -638,7 +658,7 @@ static int walk_hole(struct i915_address_space *vm, > > for (addr = hole_start; > addr + obj->base.size < hole_end; > - addr += obj->base.size) { > + addr += round_up(obj->base.size, min_alignment)) { > err = i915_vma_pin(vma, 0, 0, addr | flags); > if (err) { > pr_err("%s bind failed at %llx + %llx [hole %llx- %llx] with err=%d\n", > @@ -690,6 +710,7 @@ static int pot_hole(struct i915_address_space *vm, > { > struct drm_i915_gem_object *obj; > struct i915_vma *vma; > + unsigned int min_alignment; > unsigned long flags; > unsigned int pot; > int err = 0; > @@ -698,6 +719,8 @@ static int pot_hole(struct i915_address_space *vm, > if (i915_is_ggtt(vm)) > flags |= PIN_GLOBAL; > > + min_alignment = i915_vm_min_alignment(vm, INTEL_MEMORY_SYSTEM); > + > obj = i915_gem_object_create_internal(vm->i915, 2 * I915_GTT_PAGE_SIZE); > if (IS_ERR(obj)) > return PTR_ERR(obj); > @@ -710,13 +733,13 @@ static int pot_hole(struct i915_address_space *vm, > > /* Insert a pair of pages across every pot boundary within the hole */ > for (pot = fls64(hole_end - 1) - 1; > - pot > ilog2(2 * I915_GTT_PAGE_SIZE); > + pot > ilog2(2 * min_alignment); > pot--) { > u64 step = BIT_ULL(pot); > u64 addr; > > - for (addr = round_up(hole_start + I915_GTT_PAGE_SIZE, step) - I915_GTT_PAGE_SIZE; > - addr <= round_down(hole_end - 2*I915_GTT_PAGE_SIZE, step) - I915_GTT_PAGE_SIZE; > + for (addr = round_up(hole_start + min_alignment, step) - min_alignment; > + addr <= round_down(hole_end - (2 * min_alignment), step) - min_alignment; > addr += step) { > err = i915_vma_pin(vma, 0, 0, addr | flags); > if (err) { > @@ -761,6 +784,7 @@ static int drunk_hole(struct i915_address_space *vm, > unsigned long end_time) > { > I915_RND_STATE(prng); > + unsigned int min_alignment; > unsigned int size; > unsigned long flags; > > @@ -768,15 +792,18 @@ static int drunk_hole(struct i915_address_space *vm, > if (i915_is_ggtt(vm)) > flags |= PIN_GLOBAL; > > + min_alignment = i915_vm_min_alignment(vm, INTEL_MEMORY_SYSTEM); > + > /* Keep creating larger objects until one cannot fit into the hole */ > for (size = 12; (hole_end - hole_start) >> size; size++) { > struct drm_i915_gem_object *obj; > unsigned int *order, count, n; > struct i915_vma *vma; > - u64 hole_size; > + u64 hole_size, aligned_size; > int err = -ENODEV; > > - hole_size = (hole_end - hole_start) >> size; > + aligned_size = max_t(u32, ilog2(min_alignment), size); > + hole_size = (hole_end - hole_start) >> aligned_size; > if (hole_size > KMALLOC_MAX_SIZE / sizeof(u32)) > hole_size = KMALLOC_MAX_SIZE / sizeof(u32); > count = hole_size >> 1; > @@ -816,7 +843,7 @@ static int drunk_hole(struct i915_address_space *vm, > GEM_BUG_ON(vma->size != BIT_ULL(size)); > > for (n = 0; n < count; n++) { > - u64 addr = hole_start + order[n] * BIT_ULL(size); > + u64 addr = hole_start + order[n] * BIT_ULL(aligned_size); > > err = i915_vma_pin(vma, 0, 0, addr | flags); > if (err) { > @@ -868,11 +895,14 @@ static int __shrink_hole(struct i915_address_space *vm, > { > struct drm_i915_gem_object *obj; > unsigned long flags = PIN_OFFSET_FIXED | PIN_USER; > + unsigned int min_alignment; > unsigned int order = 12; > LIST_HEAD(objects); > int err = 0; > u64 addr; > > + min_alignment = i915_vm_min_alignment(vm, INTEL_MEMORY_SYSTEM); > + > /* Keep creating larger objects until one cannot fit into the hole */ > for (addr = hole_start; addr < hole_end; ) { > struct i915_vma *vma; > @@ -913,7 +943,7 @@ static int __shrink_hole(struct i915_address_space *vm, > } > > i915_vma_unpin(vma); > - addr += size; > + addr += round_up(size, min_alignment); > > /* > * Since we are injecting allocation faults at random intervals, > -- > 2.25.1 >