The plan is to use the blitter engine for async object clearing when using local memory, but before we can move the worker to get_pages() we have to first tame some more of our struct_mutex usage. With this in mind we should be able to upstream the object clearing as some selftests, which should serve as a guinea pig for the ongoing locking rework and upcoming asyc get_pages() framework. Signed-off-by: Matthew Auld <matthew.auld@xxxxxxxxx> --- drivers/gpu/drm/i915/Makefile | 2 + drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 1 + drivers/gpu/drm/i915/i915_drv.h | 3 + drivers/gpu/drm/i915/i915_gem.c | 2 +- drivers/gpu/drm/i915/i915_gem_client_blt.c | 186 ++++++++++++++++++ drivers/gpu/drm/i915/i915_gem_client_blt.h | 21 ++ drivers/gpu/drm/i915/i915_gem_object_blt.c | 91 +++++++++ drivers/gpu/drm/i915/i915_gem_object_blt.h | 23 +++ .../drm/i915/selftests/i915_gem_client_blt.c | 123 ++++++++++++ .../drm/i915/selftests/i915_gem_object_blt.c | 111 +++++++++++ .../drm/i915/selftests/i915_live_selftests.h | 2 + 11 files changed, 564 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/i915/i915_gem_client_blt.c create mode 100644 drivers/gpu/drm/i915/i915_gem_client_blt.h create mode 100644 drivers/gpu/drm/i915/i915_gem_object_blt.c create mode 100644 drivers/gpu/drm/i915/i915_gem_object_blt.h create mode 100644 drivers/gpu/drm/i915/selftests/i915_gem_client_blt.c create mode 100644 drivers/gpu/drm/i915/selftests/i915_gem_object_blt.c diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 58643373495c..fc123221304e 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -90,6 +90,7 @@ i915-y += \ i915_cmd_parser.o \ i915_gem_batch_pool.o \ i915_gem_clflush.o \ + i915_gem_client_blt.o \ i915_gem_context.o \ i915_gem_dmabuf.o \ i915_gem_evict.o \ @@ -99,6 +100,7 @@ i915-y += \ i915_gem_internal.o \ i915_gem.o \ i915_gem_object.o \ + i915_gem_object_blt.o \ i915_gem_pm.o \ i915_gem_render_state.o \ i915_gem_shrinker.o \ diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h index a34ece53a771..7e95827b0726 100644 --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h @@ -180,6 +180,7 @@ #define GFX_OP_DRAWRECT_INFO_I965 ((0x7900<<16)|0x2) #define COLOR_BLT_CMD (2<<29 | 0x40<<22 | (5-2)) +#define XY_COLOR_BLT_CMD (2<<29 | 0x50<<22) #define SRC_COPY_BLT_CMD ((2<<29)|(0x43<<22)|4) #define XY_SRC_COPY_BLT_CMD ((2<<29)|(0x53<<22)|6) #define XY_MONO_SRC_COPY_IMM_BLT ((2<<29)|(0x71<<22)|5) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 1cea98f8b85c..61f95391edf0 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2945,6 +2945,9 @@ dma_addr_t i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, unsigned long n); +struct sg_table * +__i915_gem_object_unset_pages(struct drm_i915_gem_object *obj); + void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, struct sg_table *pages, unsigned int sg_page_sizes); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 4c1793b1012e..65b6d7b7b624 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2121,7 +2121,7 @@ static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj) rcu_read_unlock(); } -static struct sg_table * +struct sg_table * __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj) { struct drm_i915_private *i915 = to_i915(obj->base.dev); diff --git a/drivers/gpu/drm/i915/i915_gem_client_blt.c b/drivers/gpu/drm/i915/i915_gem_client_blt.c new file mode 100644 index 000000000000..7fd977d54b57 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gem_client_blt.c @@ -0,0 +1,186 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + */ +#include "i915_gem_client_blt.h" + +#include "i915_gem_object_blt.h" +#include "intel_drv.h" + +static struct drm_i915_gem_object * +create_sleeve(struct drm_i915_private *i915, + struct sg_table *pages, + unsigned int page_sizes, + u64 size) +{ + struct drm_i915_gem_object *sleeve; + + /* XXX: sketchy af */ + sleeve = i915_gem_object_create_internal(i915, size); + if (IS_ERR(sleeve)) + return sleeve; + + mutex_lock(&sleeve->mm.lock); + + atomic_inc(&sleeve->mm.pages_pin_count); + __i915_gem_object_set_pages(sleeve, pages, page_sizes); + + mutex_unlock(&sleeve->mm.lock); + + return sleeve; +} + +static void destroy_sleeve(struct drm_i915_gem_object *sleeve) +{ + mutex_lock(&sleeve->mm.lock); + + __i915_gem_object_unset_pages(sleeve); + atomic_dec(&sleeve->mm.pages_pin_count); + + mutex_unlock(&sleeve->mm.lock); + + i915_gem_object_put(sleeve); +} + +struct clear_pages_work { + struct dma_fence dma; + struct i915_sw_fence wait; + struct work_struct work; + struct drm_i915_gem_object *sleeve; + struct drm_i915_gem_object *obj; + struct intel_context *ce; + u32 value; +}; + +static const char *clear_pages_work_driver_name(struct dma_fence *fence) +{ + return DRIVER_NAME; +} + +static const char *clear_pages_work_timeline_name(struct dma_fence *fence) +{ + return "clear"; +} + +static void clear_pages_work_release(struct dma_fence *fence) +{ + struct clear_pages_work *w = container_of(fence, typeof(*w), dma); + + i915_sw_fence_fini(&w->wait); + + BUILD_BUG_ON(offsetof(typeof(*w), dma)); + dma_fence_free(&w->dma); +} + +static const struct dma_fence_ops clear_pages_work_ops = { + .get_driver_name = clear_pages_work_driver_name, + .get_timeline_name = clear_pages_work_timeline_name, + .release = clear_pages_work_release, +}; + +/* XXX: needs to be taken out and shot */ +static void i915_clear_pages_worker(struct work_struct *work) +{ + struct clear_pages_work *w = container_of(work, typeof(*w), work); + struct drm_i915_private *i915 = w->ce->gem_context->i915; + intel_wakeref_t wakeref; + int err; + + mutex_lock(&i915->drm.struct_mutex); + + wakeref = intel_runtime_pm_get(i915); + + err = i915_gem_object_fill_blt(w->ce, w->sleeve, w->value); + if (unlikely(err)) + dma_fence_set_error(&w->dma, err); + + err = i915_gem_object_unbind(w->sleeve); + if (unlikely(err)) + dma_fence_set_error(&w->dma, err); + + intel_runtime_pm_put(i915, wakeref); + + mutex_unlock(&i915->drm.struct_mutex); + + i915_gem_object_put(w->obj); + + destroy_sleeve(w->sleeve); + + dma_fence_signal(&w->dma); + dma_fence_put(&w->dma); +} + +static int __i915_sw_fence_call +clear_pages_work_notify(struct i915_sw_fence *fence, + enum i915_sw_fence_notify state) +{ + struct clear_pages_work *w = container_of(fence, typeof(*w), wait); + + switch (state) { + case FENCE_COMPLETE: + schedule_work(&w->work); + break; + + case FENCE_FREE: + dma_fence_put(&w->dma); + break; + } + + return NOTIFY_DONE; +} + +static DEFINE_SPINLOCK(fence_lock); + +int i915_gem_schedule_fill_pages_blt(struct drm_i915_gem_object *obj, + struct intel_context *ce, + struct sg_table *pages, + unsigned int page_sizes, + u64 size, + u32 value) +{ + struct drm_i915_gem_object *sleeve; + struct clear_pages_work *work; + + sleeve = create_sleeve(ce->gem_context->i915, pages, page_sizes, size); + if (IS_ERR(sleeve)) + return PTR_ERR(sleeve); + + work = kmalloc(sizeof(*work), GFP_KERNEL); + if (work == NULL) { + destroy_sleeve(sleeve); + return -ENOMEM; + } + + work->value = value; + work->sleeve = sleeve; + work->obj = i915_gem_object_get(obj); + work->ce = ce; + + INIT_WORK(&work->work, i915_clear_pages_worker); + + dma_fence_init(&work->dma, + &clear_pages_work_ops, + &fence_lock, + to_i915(obj->base.dev)->mm.unordered_timeline, + 0); + i915_sw_fence_init(&work->wait, clear_pages_work_notify); + + dma_fence_get(&work->dma); + + i915_sw_fence_await_reservation(&work->wait, + obj->resv, NULL, + true, I915_FENCE_TIMEOUT, + I915_FENCE_GFP); + + i915_gem_object_lock(obj); + reservation_object_add_excl_fence(obj->resv, &work->dma); + i915_gem_object_unlock(obj); + + i915_sw_fence_commit(&work->wait); + + return 0; +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/i915_gem_client_blt.c" +#endif diff --git a/drivers/gpu/drm/i915/i915_gem_client_blt.h b/drivers/gpu/drm/i915/i915_gem_client_blt.h new file mode 100644 index 000000000000..8a0699648b02 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gem_client_blt.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ +#ifndef __I915_GEM_CLIENT_BLT_H__ +#define __I915_GEM_CLIENT_BLT_H__ + +#include <linux/types.h> + +struct drm_i915_gem_object; +struct intel_context *ce; +struct sg_table; + +int i915_gem_schedule_fill_pages_blt(struct drm_i915_gem_object *obj, + struct intel_context *ce, + struct sg_table *pages, + unsigned int page_sizes, + u64 size, + u32 value); + +#endif diff --git a/drivers/gpu/drm/i915/i915_gem_object_blt.c b/drivers/gpu/drm/i915/i915_gem_object_blt.c new file mode 100644 index 000000000000..87a6e0e3709e --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gem_object_blt.c @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + */ + +#include "i915_gem_object_blt.h" + +#include "intel_drv.h" + +int i915_vma_fill_blt(struct intel_context *ce, + struct i915_vma *vma, + u32 value) +{ + struct drm_i915_private *i915 = vma->vm->i915; + struct i915_request *rq; + u32 *cs; + int err; + + lockdep_assert_held(&i915->drm.struct_mutex); + + GEM_BUG_ON(!intel_context_is_pinned(ce)); + + rq = i915_request_create(ce); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + cs = intel_ring_begin(rq, 8); + + if (INTEL_GEN(i915) >= 8) { + *cs++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (7-2); + *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE; + *cs++ = 0; + *cs++ = vma->size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; + *cs++ = lower_32_bits(vma->node.start); + *cs++ = upper_32_bits(vma->node.start); + *cs++ = value; + *cs++ = MI_NOOP; + } else { + *cs++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (6-2); + *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE; + *cs++ = 0; + *cs++ = vma->size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; + *cs++ = vma->node.start; + *cs++ = value; + *cs++ = MI_NOOP; + *cs++ = MI_NOOP; + } + + intel_ring_advance(rq, cs); + + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + if (err) + i915_request_skip(rq, err); + + i915_request_add(rq); + + return err; +} + +int i915_gem_object_fill_blt(struct intel_context *ce, + struct drm_i915_gem_object *obj, + u32 value) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_gem_context *ctx = ce->gem_context; + struct i915_address_space *vm; + struct i915_vma *vma; + int err; + + lockdep_assert_held(&i915->drm.struct_mutex); + + vm = ctx->ppgtt ? &ctx->ppgtt->vm : &i915->ggtt.vm; + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (unlikely(err)) + return err; + + err = i915_vma_fill_blt(ce, vma, value); + + i915_vma_unpin(vma); + + return err; +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/i915_gem_object_blt.c" +#endif diff --git a/drivers/gpu/drm/i915/i915_gem_object_blt.h b/drivers/gpu/drm/i915/i915_gem_object_blt.h new file mode 100644 index 000000000000..2b25a164d23b --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gem_object_blt.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __I915_GEM_OBJECT_BLT_H__ +#define __I915_GEM_OBJECT_BLT_H__ + +#include <linux/types.h> + +struct drm_i915_gem_object; +struct intel_context; +struct i915_vma; + +int i915_vma_fill_blt(struct intel_context *ce, + struct i915_vma *vma, + u32 value); + +int i915_gem_object_fill_blt(struct intel_context *ce, + struct drm_i915_gem_object *obj, + u32 value); + +#endif diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/selftests/i915_gem_client_blt.c new file mode 100644 index 000000000000..3cd7d208a4a8 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_gem_client_blt.c @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + */ + +#include "../i915_selftest.h" + +#include "mock_drm.h" +#include "mock_context.h" + +static int igt_client_fill(void *arg) +{ + struct intel_context *ce = arg; + struct drm_i915_private *i915 = ce->gem_context->i915; + struct drm_i915_gem_object *obj; + struct rnd_state prng; + IGT_TIMEOUT(end); + u32 *vaddr; + int err = 0; + + obj = i915_gem_object_create_internal(i915, SZ_2M); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto err_put; + } + + prandom_seed_state(&prng, i915_selftest.random_seed); + + do { + u32 val = prandom_u32_state(&prng); + u32 i; + + err = i915_gem_schedule_fill_pages_blt(obj, ce, obj->mm.pages, + obj->mm.page_sizes.phys, + obj->base.size, + val); + if (err) + break; + + /* + * XXX: For now do the wait without the BKL to ensure we don't + * deadlock. + */ + err = i915_gem_object_wait(obj, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_ALL, + MAX_SCHEDULE_TIMEOUT); + if (err) + break; + + mutex_lock(&i915->drm.struct_mutex); + err = i915_gem_object_set_to_cpu_domain(obj, false); + mutex_unlock(&i915->drm.struct_mutex); + if (err) + break; + + for (i = 0; i < obj->base.size / sizeof(u32); ++i) { + if (vaddr[i] != val) { + pr_err("vaddr[%d]=%u, expected=%u\n", i, + val, vaddr[i]); + err = -EINVAL; + break; + } + } + } while (!time_after(jiffies, end)); + + i915_gem_object_unpin_map(obj); +err_put: + i915_gem_object_put(obj); + return err; +} + +int i915_gem_client_blt_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_client_fill), + }; + struct drm_file *file; + struct i915_gem_context *ctx; + struct intel_context *ce; + intel_wakeref_t wakeref; + int err; + + if (i915_terminally_wedged(i915)) + return 0; + + if (!HAS_ENGINE(i915, BCS0)) + return 0; + + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + mutex_lock(&i915->drm.struct_mutex); + ctx = live_context(i915, file); + mutex_unlock(&i915->drm.struct_mutex); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out_free; + } + + ce = i915_gem_context_get_engine(ctx, BCS0); + if (IS_ERR(ce)) + goto out_free; + + err = intel_context_pin(ce); + intel_context_put(ce); + if (err) + goto out_free; + + wakeref = intel_runtime_pm_get(i915); + err = i915_subtests(tests, ce); + intel_runtime_pm_put(i915, wakeref); + + intel_context_unpin(ce); +out_free: + mock_file_free(i915, file); + return err; +} diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object_blt.c b/drivers/gpu/drm/i915/selftests/i915_gem_object_blt.c new file mode 100644 index 000000000000..0b619fad8bb7 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object_blt.c @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + */ + +#include "../i915_selftest.h" + +#include "mock_drm.h" +#include "mock_context.h" + +static int igt_fill_blt(void *arg) +{ + struct intel_context *ce = arg; + struct drm_i915_private *i915 = ce->gem_context->i915; + struct drm_i915_gem_object *obj; + struct rnd_state prng; + IGT_TIMEOUT(end); + u32 *vaddr; + int err = 0; + + obj = i915_gem_object_create_internal(i915, SZ_2M); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto out_put; + } + + prandom_seed_state(&prng, i915_selftest.random_seed); + + do { + u32 val = prandom_u32_state(&prng); + u32 i; + + mutex_lock(&i915->drm.struct_mutex); + err = i915_gem_object_fill_blt(ce, obj, val); + mutex_unlock(&i915->drm.struct_mutex); + if (err) + break; + + mutex_lock(&i915->drm.struct_mutex); + err = i915_gem_object_set_to_cpu_domain(obj, false); + mutex_unlock(&i915->drm.struct_mutex); + if (err) + break; + + for (i = 0; i < obj->base.size / sizeof(u32); ++i) { + if (vaddr[i] != val) { + pr_err("vaddr[%d]=%u, expected=%u\n", i, + val, vaddr[i]); + err = -EINVAL; + break; + } + } + } while (!time_after(jiffies, end)); + + i915_gem_object_unpin_map(obj); +out_put: + i915_gem_object_put(obj); + return err; +} + +int i915_gem_object_blt_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_fill_blt), + }; + struct drm_file *file; + struct i915_gem_context *ctx; + struct intel_context *ce; + intel_wakeref_t wakeref; + int err; + + if (i915_terminally_wedged(i915)) + return 0; + + if (!HAS_ENGINE(i915, BCS0)) + return 0; + + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + mutex_lock(&i915->drm.struct_mutex); + ctx = live_context(i915, file); + mutex_unlock(&i915->drm.struct_mutex); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out_free; + } + + ce = i915_gem_context_get_engine(ctx, BCS0); + if (IS_ERR(ce)) + goto out_free; + + err = intel_context_pin(ce); + intel_context_put(ce); + if (err) + goto out_free; + + wakeref = intel_runtime_pm_get(i915); + err = i915_subtests(tests, ce); + intel_runtime_pm_put(i915, wakeref); + + intel_context_unpin(ce); +out_free: + mock_file_free(i915, file); + return err; +} diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index 6d766925ad04..3797574e1984 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -23,6 +23,8 @@ selftest(gem, i915_gem_live_selftests) selftest(evict, i915_gem_evict_live_selftests) selftest(hugepages, i915_gem_huge_page_live_selftests) selftest(contexts, i915_gem_context_live_selftests) +selftest(blt, i915_gem_object_blt_live_selftests) +selftest(client, i915_gem_client_blt_live_selftests) selftest(hangcheck, intel_hangcheck_live_selftests) selftest(execlists, intel_execlists_live_selftests) selftest(guc, intel_guc_live_selftest) -- 2.20.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx