On Tue, 2019-05-28 at 20:57 +0100, Matthew Auld wrote: > The plan is to use the blitter engine for async object clearing when > using local memory, but before we can move the worker to get_pages() > we > have to first tame some more of our struct_mutex usage. With this in > mind we should be able to upstream the object clearing as some > selftests, which should serve as a guinea pig for the ongoing locking > rework and upcoming async get_pages() framework. Matt, I looked this patch, and am thinking if we can make schedule/worker interface to be more generic so that we can do various async tasks, such as clearing object pages, swapping object pages, migrating object pages, etc. Or even get_pages() itself can be running asynchronizely. Thoughts? --CQ > > Signed-off-by: Matthew Auld <matthew.auld@xxxxxxxxx> > Cc: CQ Tang <cq.tang@xxxxxxxxx> > --- > drivers/gpu/drm/i915/Makefile | 2 + > .../gpu/drm/i915/gem/i915_gem_client_blt.c | 306 > ++++++++++++++++++ > .../gpu/drm/i915/gem/i915_gem_client_blt.h | 21 ++ > .../gpu/drm/i915/gem/i915_gem_object_blt.c | 109 +++++++ > .../gpu/drm/i915/gem/i915_gem_object_blt.h | 24 ++ > .../i915/gem/selftests/i915_gem_client_blt.c | 127 ++++++++ > .../i915/gem/selftests/i915_gem_object_blt.c | 111 +++++++ > drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 1 + > .../drm/i915/selftests/i915_live_selftests.h | 2 + > 9 files changed, 703 insertions(+) > create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_client_blt.c > create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_client_blt.h > create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_object_blt.c > create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_object_blt.h > create mode 100644 > drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c > create mode 100644 > drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c > > diff --git a/drivers/gpu/drm/i915/Makefile > b/drivers/gpu/drm/i915/Makefile > index 83588e9840f8..a7850bbffbe0 100644 > --- a/drivers/gpu/drm/i915/Makefile > +++ b/drivers/gpu/drm/i915/Makefile > @@ -90,6 +90,7 @@ obj-y += gem/ > gem-y += \ > gem/i915_gem_busy.o \ > gem/i915_gem_clflush.o \ > + gem/i915_gem_client_blt.o \ > gem/i915_gem_context.o \ > gem/i915_gem_dmabuf.o \ > gem/i915_gem_domain.o \ > @@ -97,6 +98,7 @@ gem-y += \ > gem/i915_gem_fence.o \ > gem/i915_gem_internal.o \ > gem/i915_gem_object.o \ > + gem/i915_gem_object_blt.o \ > gem/i915_gem_mman.o \ > gem/i915_gem_pages.o \ > gem/i915_gem_phys.o \ > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c > b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c > new file mode 100644 > index 000000000000..2d8cf29a5796 > --- /dev/null > +++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c > @@ -0,0 +1,306 @@ > +// SPDX-License-Identifier: MIT > +/* > + * Copyright © 2019 Intel Corporation > + */ > +#include "i915_gem_client_blt.h" > + > +#include "i915_gem_object_blt.h" > +#include "intel_drv.h" > + > +struct i915_sleeve { > + struct i915_vma *vma; > + struct drm_i915_gem_object *obj; > + struct sg_table *pages; > + struct i915_page_sizes page_sizes; > +}; > + > +static int vma_set_pages(struct i915_vma *vma) > +{ > + struct i915_sleeve *sleeve = vma->private; > + > + vma->pages = sleeve->pages; > + vma->page_sizes = sleeve->page_sizes; > + > + return 0; > +} > + > +static void vma_clear_pages(struct i915_vma *vma) > +{ > + GEM_BUG_ON(!vma->pages); > + vma->pages = NULL; > +} > + > +static int vma_bind(struct i915_vma *vma, > + enum i915_cache_level cache_level, > + u32 flags) > +{ > + return vma->vm->vma_ops.bind_vma(vma, cache_level, flags); > +} > + > +static void vma_unbind(struct i915_vma *vma) > +{ > + vma->vm->vma_ops.unbind_vma(vma); > +} > + > +static const struct i915_vma_ops proxy_vma_ops = { > + .set_pages = vma_set_pages, > + .clear_pages = vma_clear_pages, > + .bind_vma = vma_bind, > + .unbind_vma = vma_unbind, > +}; > + > +static struct i915_sleeve *create_sleeve(struct i915_address_space > *vm, > + struct drm_i915_gem_object > *obj, > + struct sg_table *pages, > + struct i915_page_sizes > *page_sizes) > +{ > + struct i915_sleeve *sleeve; > + struct i915_vma *vma; > + int err; > + > + sleeve = kzalloc(sizeof(*sleeve), GFP_KERNEL); > + if (!sleeve) > + return ERR_PTR(-ENOMEM); > + > + vma = i915_vma_instance(obj, vm, NULL); > + if (IS_ERR(vma)) { > + err = PTR_ERR(vma); > + goto err_free; > + } > + > + vma->private = sleeve; > + vma->ops = &proxy_vma_ops; > + > + sleeve->vma = vma; > + sleeve->obj = i915_gem_object_get(obj); > + sleeve->pages = pages; > + sleeve->page_sizes = *page_sizes; > + > + return sleeve; > + > +err_free: > + kfree(sleeve); > + return ERR_PTR(err); > +} > + > +static void destroy_sleeve(struct i915_sleeve *sleeve) > +{ > + i915_gem_object_put(sleeve->obj); > + kfree(sleeve); > +} > + > +struct clear_pages_work { > + struct dma_fence dma; > + struct dma_fence_cb cb; > + struct i915_sw_fence wait; > + struct work_struct work; > + struct irq_work irq_work; > + struct i915_sleeve *sleeve; > + struct intel_context *ce; > + u32 value; > +}; > + > +static const char *clear_pages_work_driver_name(struct dma_fence > *fence) > +{ > + return DRIVER_NAME; > +} > + > +static const char *clear_pages_work_timeline_name(struct dma_fence > *fence) > +{ > + return "clear"; > +} > + > +static void clear_pages_work_release(struct dma_fence *fence) > +{ > + struct clear_pages_work *w = container_of(fence, typeof(*w), > dma); > + > + destroy_sleeve(w->sleeve); > + > + i915_sw_fence_fini(&w->wait); > + > + BUILD_BUG_ON(offsetof(typeof(*w), dma)); > + dma_fence_free(&w->dma); > +} > + > +static const struct dma_fence_ops clear_pages_work_ops = { > + .get_driver_name = clear_pages_work_driver_name, > + .get_timeline_name = clear_pages_work_timeline_name, > + .release = clear_pages_work_release, > +}; > + > +static void clear_pages_signal_irq_worker(struct irq_work *work) > +{ > + struct clear_pages_work *w = container_of(work, typeof(*w), > irq_work); > + > + dma_fence_signal(&w->dma); > + dma_fence_put(&w->dma); > +} > + > +static void clear_pages_dma_fence_cb(struct dma_fence *fence, > + struct dma_fence_cb *cb) > +{ > + struct clear_pages_work *w = container_of(cb, typeof(*w), cb); > + > + if (fence->error) > + dma_fence_set_error(&w->dma, fence->error); > + > + /* > + * Push the signalling of the fence into yet another worker to > avoid > + * the nightmare locking around the fence spinlock. > + */ > + irq_work_queue(&w->irq_work); > +} > + > +static void clear_pages_worker(struct work_struct *work) > +{ > + struct clear_pages_work *w = container_of(work, typeof(*w), > work); > + struct drm_i915_private *i915 = w->ce->gem_context->i915; > + struct drm_i915_gem_object *obj = w->sleeve->obj; > + struct i915_vma *vma = w->sleeve->vma; > + struct i915_request *rq; > + int err = w->dma.error; > + > + if (unlikely(err)) > + goto out_signal; > + > + if (obj->cache_dirty) { > + obj->write_domain = 0; > + if (i915_gem_object_has_struct_page(obj)) > + drm_clflush_sg(w->sleeve->pages); > + obj->cache_dirty = false; > + } > + > + /* XXX: we need to kill this */ > + mutex_lock(&i915->drm.struct_mutex); > + err = i915_vma_pin(vma, 0, 0, PIN_USER); > + if (unlikely(err)) > + goto out_unlock; > + > + rq = i915_request_create(w->ce); > + if (IS_ERR(rq)) { > + err = PTR_ERR(rq); > + goto out_unpin; > + } > + > + /* There's no way the fence has signalled */ > + if (dma_fence_add_callback(&rq->fence, &w->cb, > + clear_pages_dma_fence_cb)) > + GEM_BUG_ON(1); > + > + if (w->ce->engine->emit_init_breadcrumb) { > + err = w->ce->engine->emit_init_breadcrumb(rq); > + if (unlikely(err)) > + goto out_request; > + } > + > + err = intel_emit_vma_fill_blt(rq, vma, w->value); > + if (unlikely(err)) > + goto out_request; > + > + /* XXX: more feverish nightmares await */ > + i915_vma_lock(vma); > + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); > + i915_vma_unlock(vma); > +out_request: > + if (unlikely(err)) { > + i915_request_skip(rq, err); > + err = 0; > + } > + > + i915_request_add(rq); > +out_unpin: > + i915_vma_unpin(vma); > +out_unlock: > + mutex_unlock(&i915->drm.struct_mutex); > +out_signal: > + if (unlikely(err)) { > + dma_fence_set_error(&w->dma, err); > + dma_fence_signal(&w->dma); > + dma_fence_put(&w->dma); > + } > +} > + > +static int __i915_sw_fence_call > +clear_pages_work_notify(struct i915_sw_fence *fence, > + enum i915_sw_fence_notify state) > +{ > + struct clear_pages_work *w = container_of(fence, typeof(*w), > wait); > + > + switch (state) { > + case FENCE_COMPLETE: > + schedule_work(&w->work); > + break; > + > + case FENCE_FREE: > + dma_fence_put(&w->dma); > + break; > + } > + > + return NOTIFY_DONE; > +} > + > +static DEFINE_SPINLOCK(fence_lock); > + > +/* XXX: better name please */ > +int i915_gem_schedule_fill_pages_blt(struct drm_i915_gem_object > *obj, > + struct intel_context *ce, > + struct sg_table *pages, > + struct i915_page_sizes > *page_sizes, > + u32 value) > +{ > + struct drm_i915_private *i915 = to_i915(obj->base.dev); > + struct i915_gem_context *ctx = ce->gem_context; > + struct i915_address_space *vm; > + struct clear_pages_work *work; > + struct i915_sleeve *sleeve; > + int err; > + > + vm = ctx->ppgtt ? &ctx->ppgtt->vm : &i915->ggtt.vm; > + > + sleeve = create_sleeve(vm, obj, pages, page_sizes); > + if (IS_ERR(sleeve)) > + return PTR_ERR(sleeve); > + > + work = kmalloc(sizeof(*work), GFP_KERNEL); > + if (!work) { > + destroy_sleeve(sleeve); > + return -ENOMEM; > + } > + > + work->value = value; > + work->sleeve = sleeve; > + work->ce = ce; > + > + INIT_WORK(&work->work, clear_pages_worker); > + > + init_irq_work(&work->irq_work, clear_pages_signal_irq_worker); > + > + dma_fence_init(&work->dma, > + &clear_pages_work_ops, > + &fence_lock, > + i915->mm.unordered_timeline, > + 0); > + i915_sw_fence_init(&work->wait, clear_pages_work_notify); > + > + i915_gem_object_lock(obj); > + err = i915_sw_fence_await_reservation(&work->wait, > + obj->resv, NULL, > + true, I915_FENCE_TIMEOUT, > + I915_FENCE_GFP); > + if (err < 0) { > + dma_fence_set_error(&work->dma, err); > + } else { > + reservation_object_add_excl_fence(obj->resv, &work- > >dma); > + err = 0; > + } > + i915_gem_object_unlock(obj); > + > + dma_fence_get(&work->dma); > + i915_sw_fence_commit(&work->wait); > + > + return err; > +} > + > +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) > +#include "selftests/i915_gem_client_blt.c" > +#endif > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.h > b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.h > new file mode 100644 > index 000000000000..3dbd28c22ff5 > --- /dev/null > +++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.h > @@ -0,0 +1,21 @@ > +/* SPDX-License-Identifier: MIT */ > +/* > + * Copyright © 2019 Intel Corporation > + */ > +#ifndef __I915_GEM_CLIENT_BLT_H__ > +#define __I915_GEM_CLIENT_BLT_H__ > + > +#include <linux/types.h> > + > +struct drm_i915_gem_object; > +struct i915_page_sizes; > +struct intel_context; > +struct sg_table; > + > +int i915_gem_schedule_fill_pages_blt(struct drm_i915_gem_object > *obj, > + struct intel_context *ce, > + struct sg_table *pages, > + struct i915_page_sizes > *page_sizes, > + u32 value); > + > +#endif > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c > b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c > new file mode 100644 > index 000000000000..84324b755de6 > --- /dev/null > +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c > @@ -0,0 +1,109 @@ > +// SPDX-License-Identifier: MIT > +/* > + * Copyright © 2019 Intel Corporation > + */ > + > +#include "i915_gem_object_blt.h" > + > +#include "i915_gem_clflush.h" > +#include "intel_drv.h" > + > +int intel_emit_vma_fill_blt(struct i915_request *rq, > + struct i915_vma *vma, > + u32 value) > +{ > + u32 *cs; > + > + cs = intel_ring_begin(rq, 8); > + if (IS_ERR(cs)) > + return PTR_ERR(cs); > + > + if (INTEL_GEN(rq->i915) >= 8) { > + *cs++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (7-2); > + *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE; > + *cs++ = 0; > + *cs++ = vma->size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; > + *cs++ = lower_32_bits(vma->node.start); > + *cs++ = upper_32_bits(vma->node.start); > + *cs++ = value; > + *cs++ = MI_NOOP; > + } else { > + *cs++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (6-2); > + *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE; > + *cs++ = 0; > + *cs++ = vma->size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; > + *cs++ = vma->node.start; > + *cs++ = value; > + *cs++ = MI_NOOP; > + *cs++ = MI_NOOP; > + } > + > + intel_ring_advance(rq, cs); > + > + return 0; > +} > + > +int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj, > + struct intel_context *ce, > + u32 value) > +{ > + struct drm_i915_private *i915 = to_i915(obj->base.dev); > + struct i915_gem_context *ctx = ce->gem_context; > + struct i915_address_space *vm; > + struct i915_request *rq; > + struct i915_vma *vma; > + int err; > + > + /* XXX: ce->vm please */ > + vm = ctx->ppgtt ? &ctx->ppgtt->vm : &i915->ggtt.vm; > + > + vma = i915_vma_instance(obj, vm, NULL); > + if (IS_ERR(vma)) > + return PTR_ERR(vma); > + > + err = i915_vma_pin(vma, 0, 0, PIN_USER); > + if (unlikely(err)) > + return err; > + > + if (obj->cache_dirty & ~obj->cache_coherent) { > + i915_gem_object_lock(obj); > + i915_gem_clflush_object(obj, 0); > + i915_gem_object_unlock(obj); > + } > + > + rq = i915_request_create(ce); > + if (IS_ERR(rq)) { > + err = PTR_ERR(rq); > + goto out_unpin; > + } > + > + err = i915_request_await_object(rq, obj, true); > + if (unlikely(err)) > + goto out_request; > + > + if (ce->engine->emit_init_breadcrumb) { > + err = ce->engine->emit_init_breadcrumb(rq); > + if (unlikely(err)) > + goto out_request; > + } > + > + err = intel_emit_vma_fill_blt(rq, vma, value); > + if (unlikely(err)) > + goto out_request; > + > + i915_vma_lock(vma); > + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); > + i915_vma_unlock(vma); > +out_request: > + if (unlikely(err)) > + i915_request_skip(rq, err); > + > + i915_request_add(rq); > +out_unpin: > + i915_vma_unpin(vma); > + return err; > +} > + > +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) > +#include "selftests/i915_gem_object_blt.c" > +#endif > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h > b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h > new file mode 100644 > index 000000000000..7ec7de6ac0c0 > --- /dev/null > +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h > @@ -0,0 +1,24 @@ > +/* SPDX-License-Identifier: MIT */ > +/* > + * Copyright © 2019 Intel Corporation > + */ > + > +#ifndef __I915_GEM_OBJECT_BLT_H__ > +#define __I915_GEM_OBJECT_BLT_H__ > + > +#include <linux/types.h> > + > +struct drm_i915_gem_object; > +struct intel_context; > +struct i915_request; > +struct i915_vma; > + > +int intel_emit_vma_fill_blt(struct i915_request *rq, > + struct i915_vma *vma, > + u32 value); > + > +int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj, > + struct intel_context *ce, > + u32 value); > + > +#endif > diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c > b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c > new file mode 100644 > index 000000000000..b650d8656d92 > --- /dev/null > +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c > @@ -0,0 +1,127 @@ > +// SPDX-License-Identifier: MIT > +/* > + * Copyright © 2019 Intel Corporation > + */ > + > +#include "../i915_selftest.h" > + > +#include "selftests/igt_flush_test.h" > +#include "selftests/mock_drm.h" > +#include "mock_context.h" > + > +static int igt_client_fill(void *arg) > +{ > + struct intel_context *ce = arg; > + struct drm_i915_private *i915 = ce->gem_context->i915; > + struct drm_i915_gem_object *obj; > + struct rnd_state prng; > + IGT_TIMEOUT(end); > + u32 *vaddr; > + int err = 0; > + > + prandom_seed_state(&prng, i915_selftest.random_seed); > + > + do { > + u32 sz = prandom_u32_state(&prng) % SZ_32M; > + u32 val = prandom_u32_state(&prng); > + u32 i; > + > + sz = round_up(sz, PAGE_SIZE); > + > + pr_debug("%s with sz=%x, val=%x\n", __func__, sz, val); > + > + obj = i915_gem_object_create_internal(i915, sz); > + if (IS_ERR(obj)) { > + err = PTR_ERR(obj); > + goto err_flush; > + } > + > + vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); > + if (IS_ERR(vaddr)) { > + err = PTR_ERR(vaddr); > + goto err_put; > + } > + > + /* > + * XXX: The goal is move this to get_pages, so try to > dirty the > + * CPU cache first to check that we do the required > clflush > + * before scheduling the blt for !llc platforms. This > matches > + * some version of reality where at get_pages the pages > + * themselves may not yet be coherent with the > GPU(swap-in). If > + * we are missing the flush then we should see the > stale cache > + * values after we do the set_to_cpu_domain and pick it > up as a > + * test failure. > + */ > + memset32(vaddr, val ^ 0xdeadbeaf, obj->base.size / > sizeof(u32)); > + > + if (!(obj->cache_coherent & > I915_BO_CACHE_COHERENT_FOR_WRITE)) > + obj->cache_dirty = true; > + > + err = i915_gem_schedule_fill_pages_blt(obj, ce, obj- > >mm.pages, > + &obj- > >mm.page_sizes, > + val); > + if (err) > + goto err_unpin; > + > + /* > + * XXX: For now do the wait without the object resv > lock to > + * ensure we don't deadlock. > + */ > + err = i915_gem_object_wait(obj, > + I915_WAIT_INTERRUPTIBLE | > + I915_WAIT_ALL, > + MAX_SCHEDULE_TIMEOUT); > + if (err) > + goto err_unpin; > + > + i915_gem_object_lock(obj); > + err = i915_gem_object_set_to_cpu_domain(obj, false); > + i915_gem_object_unlock(obj); > + if (err) > + goto err_unpin; > + > + for (i = 0; i < obj->base.size / sizeof(u32); ++i) { > + if (vaddr[i] != val) { > + pr_err("vaddr[%u]=%x, expected=%x\n", > i, > + vaddr[i], val); > + err = -EINVAL; > + goto err_unpin; > + } > + } > + > + i915_gem_object_unpin_map(obj); > + i915_gem_object_put(obj); > + } while (!time_after(jiffies, end)); > + > + goto err_flush; > + > +err_unpin: > + i915_gem_object_unpin_map(obj); > +err_put: > + i915_gem_object_put(obj); > +err_flush: > + mutex_lock(&i915->drm.struct_mutex); > + if (igt_flush_test(i915, I915_WAIT_LOCKED)) > + err = -EIO; > + mutex_unlock(&i915->drm.struct_mutex); > + > + if (err == -ENOMEM) > + err = 0; > + > + return err; > +} > + > +int i915_gem_client_blt_live_selftests(struct drm_i915_private > *i915) > +{ > + static const struct i915_subtest tests[] = { > + SUBTEST(igt_client_fill), > + }; > + > + if (i915_terminally_wedged(i915)) > + return 0; > + > + if (!HAS_ENGINE(i915, BCS0)) > + return 0; > + > + return i915_subtests(tests, i915->engine[BCS0]- > >kernel_context); > +} > diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c > b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c > new file mode 100644 > index 000000000000..717521c8eb0a > --- /dev/null > +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c > @@ -0,0 +1,111 @@ > +// SPDX-License-Identifier: MIT > +/* > + * Copyright © 2019 Intel Corporation > + */ > + > +#include "../i915_selftest.h" > + > +#include "selftests/igt_flush_test.h" > +#include "selftests/mock_drm.h" > +#include "mock_context.h" > + > +static int igt_fill_blt(void *arg) > +{ > + struct intel_context *ce = arg; > + struct drm_i915_private *i915 = ce->gem_context->i915; > + struct drm_i915_gem_object *obj; > + struct rnd_state prng; > + IGT_TIMEOUT(end); > + u32 *vaddr; > + int err = 0; > + > + prandom_seed_state(&prng, i915_selftest.random_seed); > + > + do { > + u32 sz = prandom_u32_state(&prng) % SZ_32M; > + u32 val = prandom_u32_state(&prng); > + u32 i; > + > + sz = round_up(sz, PAGE_SIZE); > + > + pr_debug("%s with sz=%x, val=%x\n", __func__, sz, val); > + > + obj = i915_gem_object_create_internal(i915, sz); > + if (IS_ERR(obj)) { > + err = PTR_ERR(vaddr); > + goto err_flush; > + } > + > + vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); > + if (IS_ERR(vaddr)) { > + err = PTR_ERR(vaddr); > + goto err_put; > + } > + > + /* > + * Make sure the potentially async clflush does its > job, if > + * required. > + */ > + memset32(vaddr, val ^ 0xdeadbeaf, obj->base.size / > sizeof(u32)); > + > + if (!(obj->cache_coherent & > I915_BO_CACHE_COHERENT_FOR_WRITE)) > + obj->cache_dirty = true; > + > + > + mutex_lock(&i915->drm.struct_mutex); > + err = i915_gem_object_fill_blt(obj, ce, val); > + mutex_unlock(&i915->drm.struct_mutex); > + if (err) > + goto err_unpin; > + > + i915_gem_object_lock(obj); > + err = i915_gem_object_set_to_cpu_domain(obj, false); > + i915_gem_object_unlock(obj); > + if (err) > + goto err_unpin; > + > + for (i = 0; i < obj->base.size / sizeof(u32); ++i) { > + if (vaddr[i] != val) { > + pr_err("vaddr[%u]=%x, expected=%x\n", > i, > + vaddr[i], val); > + err = -EINVAL; > + goto err_unpin; > + } > + } > + > + i915_gem_object_unpin_map(obj); > + i915_gem_object_put(obj); > + } while (!time_after(jiffies, end)); > + > + goto err_flush; > + > +err_unpin: > + i915_gem_object_unpin_map(obj); > +err_put: > + i915_gem_object_put(obj); > +err_flush: > + mutex_lock(&i915->drm.struct_mutex); > + if (igt_flush_test(i915, I915_WAIT_LOCKED)) > + err = -EIO; > + mutex_unlock(&i915->drm.struct_mutex); > + > + if (err == -ENOMEM) > + err = 0; > + > + return err; > +} > + > +int i915_gem_object_blt_live_selftests(struct drm_i915_private > *i915) > +{ > + static const struct i915_subtest tests[] = { > + SUBTEST(igt_fill_blt), > + }; > + > + if (i915_terminally_wedged(i915)) > + return 0; > + > + if (!HAS_ENGINE(i915, BCS0)) > + return 0; > + > + return i915_subtests(tests, i915->engine[BCS0]- > >kernel_context); > +} > diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h > b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h > index a34ece53a771..7e95827b0726 100644 > --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h > +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h > @@ -180,6 +180,7 @@ > #define GFX_OP_DRAWRECT_INFO_I965 ((0x7900<<16)|0x2) > > #define COLOR_BLT_CMD (2<<29 | 0x40<<22 | (5- > 2)) > +#define XY_COLOR_BLT_CMD (2<<29 | 0x50<<22) > #define SRC_COPY_BLT_CMD ((2<<29)|(0x43<<22)|4) > #define XY_SRC_COPY_BLT_CMD ((2<<29)|(0x53<<22)|6) > #define XY_MONO_SRC_COPY_IMM_BLT ((2<<29)|(0x71<<22)|5) > diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h > b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h > index 9bda36a598b3..d5dc4427d664 100644 > --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h > +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h > @@ -25,6 +25,8 @@ selftest(gem, i915_gem_live_selftests) > selftest(evict, i915_gem_evict_live_selftests) > selftest(hugepages, i915_gem_huge_page_live_selftests) > selftest(contexts, i915_gem_context_live_selftests) > +selftest(blt, i915_gem_object_blt_live_selftests) > +selftest(client, i915_gem_client_blt_live_selftests) > selftest(reset, intel_reset_live_selftests) > selftest(hangcheck, intel_hangcheck_live_selftests) > selftest(execlists, intel_execlists_live_selftests) _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx