Quoting Matthew Auld (2017-07-03 15:14:58) > +static struct i915_vma * > +gpu_write_dw(struct i915_vma *vma, u64 offset, u32 value) > +{ > + struct drm_i915_private *i915 = to_i915(vma->obj->base.dev); > + struct drm_i915_gem_object *obj; > + struct i915_vma *batch; > + unsigned int size; > + u32 *cmd; > + int err; > + > + GEM_BUG_ON(INTEL_GEN(i915) < 8); Didn't you enable large pages for snb and earlier? > +static int gpu_write(struct i915_vma *vma, > + struct i915_gem_context *ctx, > + unsigned long offset, > + u32 value) > +{ > + struct drm_i915_private *i915 = to_i915(vma->obj->base.dev); > + struct drm_i915_gem_request *rq; > + struct i915_vma *batch; > + int flags = 0; > + int err; > + > + batch = gpu_write_dw(vma, offset, value); > + if (IS_ERR(batch)) > + return PTR_ERR(batch); > + > + rq = i915_gem_request_alloc(i915->engine[RCS], ctx); > + if (IS_ERR(rq)) { > + err = PTR_ERR(rq); > + goto err_batch; > + } > + > + err = rq->engine->emit_flush(rq, EMIT_INVALIDATE); > + if (err) > + goto err_request; > + > + err = i915_switch_context(rq); > + if (err) > + goto err_request; > + > + err = rq->engine->emit_bb_start(rq, > + batch->node.start, batch->node.size, > + flags); > + if (err) > + goto err_request; > + > + i915_vma_move_to_active(batch, rq, 0); > + i915_gem_object_set_active_reference(batch->obj); > + i915_vma_unpin(batch); > + i915_vma_close(batch); > + > + i915_vma_move_to_active(vma, rq, 0); > + > + reservation_object_lock(vma->obj->resv, NULL); vma->resv > + reservation_object_add_excl_fence(vma->obj->resv, &rq->fence); > + reservation_object_unlock(vma->obj->resv); > + > + __i915_add_request(rq, true); > + > + return 0; > + > +err_request: > + __i915_add_request(rq, false); > +err_batch: > + i915_vma_unpin(batch); Leaking batch. Just tie the lifetime of the batch to the request immediately after allocating the request. > + > + return err; > +} > + > +static inline int igt_can_allocate_thp(struct drm_i915_private *i915) > +{ > + return HAS_PAGE_SIZE(i915, I915_GTT_PAGE_SIZE_2M) && > + has_transparent_hugepage(); > +} > + > +static int igt_ppgtt_write_huge(void *arg) > +{ > + struct i915_hw_ppgtt *ppgtt = arg; > + struct drm_i915_private *i915 = ppgtt->base.i915; > + struct drm_i915_gem_object *obj; > + const unsigned int page_sizes[] = { > + I915_GTT_PAGE_SIZE_64K, > + I915_GTT_PAGE_SIZE_2M, > + }; > + struct i915_vma *vma; > + int err = 0; > + int i; > + > + if (!igt_can_allocate_thp(i915)) > + return 0; > + > + /* Sanity check that the HW uses huge-pages correctly */ > + > + ppgtt = i915->kernel_context->ppgtt; > + > + obj = i915_gem_object_create(i915, I915_GTT_PAGE_SIZE_2M); > + if (IS_ERR(obj)) > + return PTR_ERR(obj); 2M is MAX_ORDER, so you don't need thp for this and can just use i915_gem_object_create_internal() (or both, once to test without relying on thp, perhaps more common, then once to test thp shmemfs). > + err = i915_gem_object_pin_pages(obj); > + if (err) > + goto out_put; > + > + GEM_BUG_ON(!obj->mm.page_sizes.sg); > + > + if (obj->mm.page_sizes.sg < I915_GTT_PAGE_SIZE_2M) { > + pr_err("Failed to allocate huge pages\n"); > + err = -EINVAL; Failure? Or just unable to test? Nothing forces the kernel to give you contiguous space. > + goto out_unpin; > + } > + > + vma = i915_vma_instance(obj, &ppgtt->base, NULL); > + if (IS_ERR(vma)) { > + err = PTR_ERR(vma); > + goto out_unpin; > + } > + > + for (i = 0; i < ARRAY_SIZE(page_sizes); ++i) { > + unsigned int page_size = page_sizes[i]; > + u32 *map; > + > + if (!HAS_PAGE_SIZE(i915, page_size)) > + continue; > + > + /* Force the page size */ > + obj->mm.page_sizes.sg = page_size; > + > + err = i915_gem_object_set_to_gtt_domain(obj, true); > + if (err) > + goto out_close; Gets in the way of the important details, make it coherent and get rid of this. > + err = i915_vma_pin(vma, 0, 0, PIN_USER); > + if (err) > + goto out_close; Move the unbind above this: /* Force the page size */ i915_vma_unbind(vma); obj->m.page_sizes.sg = page_size; i915_vma_pin(vma); GEM_BUG_ON(vma->page_sizes.sg != page_size); > + > + GEM_BUG_ON(obj->mm.page_sizes.gtt); > + GEM_BUG_ON(vma->page_sizes.sg != page_size); > + GEM_BUG_ON(!vma->page_sizes.phys); > + > + GEM_BUG_ON(vma->page_sizes.gtt != page_size); > + GEM_BUG_ON(!IS_ALIGNED(vma->node.start, I915_GTT_PAGE_SIZE_2M)); > + GEM_BUG_ON(!IS_ALIGNED(vma->node.size, I915_GTT_PAGE_SIZE_2M)); > + > + err = gpu_write(vma, i915->kernel_context, 0, page_size); > + if (err) { > + i915_vma_unpin(vma); > + goto out_close; > + } > + > + err = i915_gem_object_set_to_cpu_domain(obj, false); > + if (err) { > + i915_vma_unpin(vma); > + goto out_close; > + } Again, this is getting in the way of the important details. > + i915_vma_unpin(vma); > + err = i915_vma_unbind(vma); > + if (err) > + goto out_close; > + > + map = i915_gem_object_pin_map(obj, I915_MAP_WB); > + if (IS_ERR(map)) { > + err = PTR_ERR(map); > + goto out_close; > + } Do we need to test different PTE bits? If you don't make this coherent, use map = pin_map(obj); set_to_cpu_domain(obj); for_each_page() test set_to_gtt_domain(obj); unpin_map(obj); > + > + GEM_BUG_ON(map[0] != page_size); This is the essential test being performed. Check it, and report an error properly. Check every page at different offsets! > + > + i915_gem_object_unpin_map(obj); > + } > + > +out_close: > + i915_vma_close(vma); > +out_unpin: > + i915_gem_object_unpin_pages(obj); > +out_put: > + i915_gem_object_put(obj); > + > + return err; > +} _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx