On 11 January 2017 at 21:09, Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> wrote: > Create partial mappings to cover a large object, investigating tiling > (fenced regions) and VMA reuse. > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > --- > drivers/gpu/drm/i915/selftests/i915_gem_object.c | 252 +++++++++++++++++++++++ > 1 file changed, 252 insertions(+) > > diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c > index 08e6b49b1e77..df3625f551aa 100644 > --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c > +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c > @@ -132,6 +132,257 @@ static int igt_gem_huge(void *arg) > return err; > } > > +struct tile { > + unsigned int width; > + unsigned int height; > + unsigned int stride; > + unsigned int size; > + unsigned int tiling; > + unsigned int swizzle; > +}; > + > +static u64 swizzle_bit(unsigned bit, u64 offset) > +{ > + return (offset & BIT_ULL(bit)) >> (bit - 6); > +} > + > +static u64 tiled_offset(const struct tile *tile, u64 v) > +{ > + u64 x, y; > + > + if (tile->tiling == I915_TILING_NONE) > + return v; > + > + switch (tile->swizzle) { > + case I915_BIT_6_SWIZZLE_9: > + v ^= swizzle_bit(9, v); > + break; > + case I915_BIT_6_SWIZZLE_9_10: > + v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v); > + break; > + case I915_BIT_6_SWIZZLE_9_11: > + v ^= swizzle_bit(9, v) ^ swizzle_bit(11, v); > + break; > + case I915_BIT_6_SWIZZLE_9_10_11: > + v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v) ^ swizzle_bit(11, v); > + break; > + } > + > + y = div64_u64_rem(v, tile->stride, &x); > + v = div64_u64_rem(y, tile->height, &y) * tile->stride * tile->height; > + > + if (tile->tiling == I915_TILING_X) { > + v += y * tile->width; > + v += div64_u64_rem(x, tile->width, &x) << tile->size; > + v += x; > + } else { > + const unsigned int ytile_span = 16; > + const unsigned int ytile_height = 32 * ytile_span; > + > + v += y * ytile_span; > + v += div64_u64_rem(x, ytile_span, &x) * ytile_height; > + v += x; > + } > + > + return v; > +} > + > +static int check_partial_mapping(struct drm_i915_gem_object *obj, > + const struct tile *tile) > +{ > + const unsigned int nreal = obj->scratch / PAGE_SIZE; > + const unsigned long npages = obj->base.size / PAGE_SIZE; > + struct i915_vma *vma; > + unsigned long page; > + int err; > + > + cond_resched(); > + if (signal_pending(current)) > + return -EINTR; > + > + err = i915_gem_object_set_tiling(obj, tile->tiling, tile->stride); > + if (err) > + return err; > + > + GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling); > + GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride); > + > + for_each_prime_number_from(page, 1, npages) { > + struct i915_ggtt_view view = > + compute_partial_view(obj, page, MIN_CHUNK_PAGES); > + u32 __iomem *io; > + struct page *p; > + unsigned int n; > + u64 offset; > + u32 *cpu; > + > + GEM_BUG_ON(intel_partial_get_page_count(&view.partial) > nreal); > + > + err = i915_gem_object_set_to_gtt_domain(obj, true); > + if (err) > + return err; > + > + vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE); > + if (IS_ERR(vma)) { > + pr_err("Failed to pin partial view: offset=%lu\n", > + page); > + return PTR_ERR(vma); > + } > + > + n = page - intel_partial_get_page_offset(&view.partial); > + GEM_BUG_ON(n >= intel_partial_get_page_count(&view.partial)); > + > + io = i915_vma_pin_iomap(vma); > + i915_vma_unpin(vma); > + if (IS_ERR(io)) { > + pr_err("Failed to iomap partial view: offset=%lu\n", > + page); > + return PTR_ERR(io); > + } > + > + err = i915_vma_get_fence(vma); > + if (err) { > + pr_err("Failed to get fence for partial view: offset=%lu\n", > + page); > + i915_vma_unpin_iomap(vma); > + return PTR_ERR(io); An odd error code ;) > + } > + > + iowrite32(page, io + n * PAGE_SIZE/sizeof(*io)); > + i915_vma_unpin_iomap(vma); > + > + offset = tiled_offset(tile, page << PAGE_SHIFT); > + if (offset >= obj->base.size) > + continue; > + > + i915_gem_object_flush_gtt_write_domain(obj); > + > + p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); > + cpu = kmap(p) + offset_in_page(offset); > + drm_clflush_virt_range(cpu, sizeof(*cpu)); > + if (*cpu != (u32)page) { > + pr_err("Partial view for %lu [%u] (offset=%llu, size=%u [%llu, row size %u], fence=%d, tiling=%d, stride=%d) misalignment, expected write to page (%llu + %u [0x%llx]) of 0x%x, found 0x%x\n", > + page, n, > + intel_partial_get_page_offset(&view.partial), > + intel_partial_get_page_count(&view.partial), > + vma->size >> PAGE_SHIFT, > + tile_row_pages(obj), > + vma->fence ? vma->fence->id : -1, tile->tiling, tile->stride, > + offset >> PAGE_SHIFT, > + (unsigned int)offset_in_page(offset), > + offset, > + (u32)page, *cpu); > + err = -EINVAL; > + } > + *cpu = 0; > + drm_clflush_virt_range(cpu, sizeof(*cpu)); > + kunmap(p); > + if (err) > + return err; > + } > + > + return 0; > +} > + > +static int igt_partial_tiling(void *arg) > +{ > + const unsigned int nreal = 1 << 12; /* largest tile row x2 */ > + struct drm_i915_private *i915 = arg; > + struct drm_i915_gem_object *obj; > + struct tile tile; > + int err; > + > + obj = huge_gem_object(i915, > + nreal << PAGE_SHIFT, > + (1 + next_prime_number(i915->ggtt.base.total >> PAGE_SHIFT)) << PAGE_SHIFT); > + if (IS_ERR(obj)) > + return PTR_ERR(obj); > + > + err = i915_gem_object_pin_pages(obj); > + if (err) { > + pr_err("Failed to allocate %u pages (%zu total), err=%d\n", > + nreal, obj->base.size / PAGE_SIZE, err); > + goto err; > + } > + > + tile.height = 1; > + tile.width = 1; > + tile.size = 0; > + tile.stride = 0; > + tile.swizzle = I915_BIT_6_SWIZZLE_NONE; > + tile.tiling = I915_TILING_NONE; > + > + mutex_lock(&i915->drm.struct_mutex); > + err = check_partial_mapping(obj, &tile); > + if (err) > + goto err_unlock; > + > + for (tile.tiling = I915_TILING_X; > + tile.tiling <= I915_TILING_Y; > + tile.tiling++) { > + unsigned int max_pitch; > + unsigned int pitch; > + > + switch (tile.tiling) { > + case I915_TILING_X: > + tile.swizzle = i915->mm.bit_6_swizzle_x; > + break; > + case I915_TILING_Y: > + tile.swizzle = i915->mm.bit_6_swizzle_y; > + break; > + } > + > + if (tile.swizzle == I915_BIT_6_SWIZZLE_UNKNOWN || > + tile.swizzle == I915_BIT_6_SWIZZLE_9_10_17) > + continue; > + > + if (INTEL_GEN(i915) <= 2) { > + tile.height = 16; > + tile.width = 128; > + tile.size = 11; > + } else if (tile.tiling == I915_TILING_Y && > + HAS_128_BYTE_Y_TILING(i915)) { > + tile.height = 32; > + tile.width = 128; > + tile.size = 12; > + } else { > + tile.height = 8; > + tile.width = 512; > + tile.size = 12; > + } > + > + if (INTEL_GEN(i915) < 4) > + max_pitch = 8192 / tile.width; > + else if (INTEL_GEN(i915) < 7) > + max_pitch = 128 * I965_FENCE_MAX_PITCH_VAL / tile.width; > + else > + max_pitch = 128 * GEN7_FENCE_MAX_PITCH_VAL / tile.width; > + > + for (pitch = 1; pitch <= max_pitch; pitch <<= 1) { > + tile.stride = tile.width * pitch; > + err = check_partial_mapping(obj, &tile); > + if (err) > + goto err_unlock; > + } > + > + if (INTEL_GEN(i915) >= 4) { Why do we need this restriction, not enough primes ? Otherwise seems reasonable: Reviewed-by: Matthew Auld <matthew.auld@xxxxxxxxx> _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx