Ben Widawsky <ben@xxxxxxxxxxxx> writes: > On Tue, Apr 22, 2014 at 08:19:43PM +0300, Mika Kuoppala wrote: >> HW guys say that it is not a cool idea to let device >> go into rc6 without proper 3d pipeline state. >> >> For each new uninitialized context, generate a >> valid null render state to be run on context >> creation. >> >> This patch introduces a skeleton with emty states. >> >> Signed-off-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxx> >> --- >> drivers/gpu/drm/i915/Makefile | 1 + >> drivers/gpu/drm/i915/i915_drv.h | 2 + >> drivers/gpu/drm/i915/i915_gem_context.c | 7 + >> drivers/gpu/drm/i915/i915_gem_render_state.c | 222 +++++++++++++++++++++++++ >> drivers/gpu/drm/i915/intel_renderstate_gen6.h | 11 ++ >> drivers/gpu/drm/i915/intel_renderstate_gen7.h | 11 ++ >> drivers/gpu/drm/i915/intel_renderstate_gen8.h | 11 ++ >> 7 files changed, 265 insertions(+) >> create mode 100644 drivers/gpu/drm/i915/i915_gem_render_state.c >> create mode 100644 drivers/gpu/drm/i915/intel_renderstate_gen6.h >> create mode 100644 drivers/gpu/drm/i915/intel_renderstate_gen7.h >> create mode 100644 drivers/gpu/drm/i915/intel_renderstate_gen8.h >> >> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile >> index b1445b7..b5d4029 100644 >> --- a/drivers/gpu/drm/i915/Makefile >> +++ b/drivers/gpu/drm/i915/Makefile >> @@ -18,6 +18,7 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o >> # GEM code >> i915-y += i915_cmd_parser.o \ >> i915_gem_context.o \ >> + i915_gem_render_state.o \ >> i915_gem_debug.o \ >> i915_gem_dmabuf.o \ >> i915_gem_evict.o \ >> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h >> index 43b022c..f6ae2ee 100644 >> --- a/drivers/gpu/drm/i915/i915_drv.h >> +++ b/drivers/gpu/drm/i915/i915_drv.h >> @@ -2330,6 +2330,8 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, >> int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, >> struct drm_file *file); >> >> +/* i915_gem_render_state.c */ >> +int i915_gem_init_render_state(struct intel_ring_buffer *ring); >> /* i915_gem_evict.c */ >> int __must_check i915_gem_evict_something(struct drm_device *dev, >> struct i915_address_space *vm, >> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c >> index 30b355a..d648d4d 100644 >> --- a/drivers/gpu/drm/i915/i915_gem_context.c >> +++ b/drivers/gpu/drm/i915/i915_gem_context.c >> @@ -746,6 +746,13 @@ static int do_switch(struct intel_ring_buffer *ring, >> /* obj is kept alive until the next request by its active ref */ >> i915_gem_object_ggtt_unpin(from->obj); >> i915_gem_context_unreference(from); >> + } else { >> + if (ring->id == RCS && to->is_initialized == false) { >> + >> + ret = i915_gem_init_render_state(ring); >> + if (ret) >> + DRM_ERROR("init render state: %d\n", ret); >> + } > > The way I thought we'd do this was to emit the state once for the > default context, and copy those pages to the BO of the new context. The state is big (on bdw), 18 pages. More on cover letter of v2 series: 1399382766-25116-1-git-send-email-mika.kuoppala@xxxxxxxxx >> } >> >> to->is_initialized = true; >> diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c >> new file mode 100644 >> index 0000000..409f99b >> --- /dev/null >> +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c >> @@ -0,0 +1,222 @@ >> +/* >> + * Copyright © 2014 Intel Corporation >> + * >> + * Permission is hereby granted, free of charge, to any person obtaining a >> + * copy of this software and associated documentation files (the "Software"), >> + * to deal in the Software without restriction, including without limitation >> + * the rights to use, copy, modify, merge, publish, distribute, sublicense, >> + * and/or sell copies of the Software, and to permit persons to whom the >> + * Software is furnished to do so, subject to the following conditions: >> + * >> + * The above copyright notice and this permission notice (including the next >> + * paragraph) shall be included in all copies or substantial portions of the >> + * Software. >> + * >> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR >> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, >> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL >> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER >> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING >> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS >> + * IN THE SOFTWARE. >> + * >> + * Authors: >> + * Mika Kuoppala <mika.kuoppala@xxxxxxxxx> >> + * >> + */ >> + >> +#include "i915_drv.h" >> + >> +#include "intel_renderstate_gen6.h" >> +#include "intel_renderstate_gen7.h" >> +#include "intel_renderstate_gen8.h" >> + >> +#define BATCH_MAX_SIZE 4096 >> + >> +struct i915_render_state { >> + struct drm_i915_gem_object *obj; >> + unsigned long ggtt_offset; >> + void *batch; >> + u32 size; >> + u32 len; >> +}; >> + >> +static struct i915_render_state * >> +render_state_alloc(struct drm_device *dev, unsigned int size) >> +{ >> + struct i915_render_state *so; >> + int ret; >> + >> + so = kzalloc(sizeof(*so), GFP_KERNEL); >> + if (!so) >> + return ERR_PTR(-ENOMEM); >> + >> + so->size = ALIGN(size, 4096); >> + >> + so->obj = i915_gem_alloc_object(dev, so->size); >> + if (so->obj == NULL) { >> + ret = -ENOMEM; >> + goto free; >> + } >> + >> + ret = i915_gem_obj_ggtt_pin(so->obj, 4096, 0); >> + if (ret) >> + goto free_gem; > > Using dispatch the way you do below, we'll actually be executing out of > the PPGTT, and not the global. While this makes no difference on the > current code, it does make a difference for some of my plans. I don't > see a clean way to fix this with the current code base, I am just > complaining. > >> + >> + so->batch = i915_gem_vmap_obj(so->obj); >> + if (!so->batch) { >> + ret = -ENOMEM; >> + goto unpin; >> + } >> + >> + so->ggtt_offset = i915_gem_obj_ggtt_offset(so->obj); >> + >> + return so; >> +unpin: >> + i915_gem_object_ggtt_unpin(so->obj); >> +free_gem: >> + drm_gem_object_unreference(&so->obj->base); >> +free: >> + kfree(so); >> + return ERR_PTR(ret); >> +} >> + >> +static void render_state_free(struct i915_render_state *so) >> +{ >> + vunmap(so->batch); >> + i915_gem_object_ggtt_unpin(so->obj); >> + drm_gem_object_unreference(&so->obj->base); >> + kfree(so); >> +} >> + >> +struct render_state_ro_data { >> + const u32 *batch; >> + unsigned int batch_items; >> + const u32 *reloc; >> + unsigned int reloc_items; >> + bool reloc_64bit; >> +}; >> + >> +static int render_state_copy(struct i915_render_state *so, >> + const struct render_state_ro_data *source) >> +{ >> + const u64 goffset = i915_gem_obj_ggtt_offset(so->obj); >> + const unsigned int num_relocs = source->reloc_items; >> + int reloc_index = 0; >> + u32 *d = (uint32_t *)so->batch; >> + unsigned int i = 0; >> + >> + if (source->batch_items * sizeof(*source->batch) > so->size) >> + return -EINVAL; >> + >> + while (i < source->batch_items) { >> + u32 s = source->batch[i]; >> + >> + if (reloc_index < num_relocs && >> + i * 4 == source->reloc[reloc_index]) { >> + >> + s += goffset & 0xffffffff; >> + >> + /* We keep batch offsets max 32bit */ >> + if (source->reloc_64bit) { >> + if (i + 1 >= source->batch_items || >> + source->batch[i + 1] != 0) >> + return -EINVAL; >> + >> + d[i] = s; >> + i++; >> + s = (goffset & 0xffffffff00000000ull) >> 32; >> + } >> + >> + reloc_index++; >> + } >> + >> + d[i] = s; >> + i++; >> + } >> + >> + if (num_relocs != reloc_index) { >> + DRM_ERROR("not all relocs resolved, %d out of %d\n", >> + reloc_index, num_relocs); >> + return -EINVAL; >> + } >> + >> + so->len = source->batch_items * sizeof(*source->batch); >> + >> + return 0; >> +} >> + >> +static int render_state_get(struct i915_render_state *so, int gen) >> +{ >> + struct render_state_ro_data ro_data; >> + >> +#define RS_SETUP(_d, _g, _r) { \ >> + if (!sizeof(gen ## _g ## _null_state_batch)) \ >> + return -EINVAL; \ >> + if (sizeof(gen ## _g ## _null_state_batch) & 0x3) \ >> + return -EINVAL; \ >> + if (sizeof(gen ## _g ## _null_state_relocs) & 0x3) \ >> + return -EINVAL; \ >> + _d.batch = gen ## _g ## _null_state_batch; \ >> + _d.batch_items = sizeof(gen ##_g ## _null_state_batch) / 4; \ >> + _d.reloc = gen ## _g ## _null_state_relocs; \ >> + _d.reloc_items = sizeof(gen ## _g ## _null_state_relocs) / 4; \ >> + _d.reloc_64bit = _r; \ >> + } >> + >> + switch (gen) { >> + case 6: >> + RS_SETUP(ro_data, 6, false); >> + break; >> + case 7: >> + RS_SETUP(ro_data, 7, false); >> + break; > > I'm a bit surprised we have no differences for HSW/BYT/IVB. The state generators need eyes on them for sure. There was some difference between ivb/hsw in rendercopy. But for the null state, I gathered it doesnt matter. >> + case 8: >> + RS_SETUP(ro_data, 8, true); >> + break; >> + default: >> + return -ENOENT; >> + } >> +#undef RS_SETUP >> + >> + return render_state_copy(so, &ro_data); >> +} >> + >> +int i915_gem_init_render_state(struct intel_ring_buffer *ring) >> +{ >> + const int gen = INTEL_INFO(ring->dev)->gen; >> + struct i915_render_state *so; >> + u32 seqno; >> + int ret; >> + >> + if (gen < 6) >> + return 0; > > HAS_HW_CONTEXTS > ickle: "Never forget ILK contexts" We have states only for >= snb. So thus the bailout. And do_switch is not called with fake contexts. >> + >> + so = render_state_alloc(ring->dev, BATCH_MAX_SIZE); >> + if (IS_ERR(so)) >> + return PTR_ERR(so); >> + >> + ret = render_state_get(so, gen); >> + if (ret) >> + goto out; >> + >> + ret = ring->dispatch_execbuffer(ring, >> + i915_gem_obj_ggtt_offset(so->obj), >> + so->len, >> + I915_DISPATCH_SECURE); >> + if (ret) >> + goto out; >> + >> + ret = intel_ring_flush_all_caches(ring); >> + if (ret) >> + goto out; >> + >> + ret = i915_add_request(ring, &seqno); >> + if (ret) >> + goto out; >> + >> + ret = i915_wait_seqno(ring, seqno); > > This wait is going to hurt. Not seeing why it's needed (plus I still > recommend my idea on the top). I couldn't come up with any explanations either. Removed in v2 Thanks, -Mika >> +out: >> + render_state_free(so); >> + return ret; >> +} >> diff --git a/drivers/gpu/drm/i915/intel_renderstate_gen6.h b/drivers/gpu/drm/i915/intel_renderstate_gen6.h >> new file mode 100644 >> index 0000000..4809f2f >> --- /dev/null >> +++ b/drivers/gpu/drm/i915/intel_renderstate_gen6.h >> @@ -0,0 +1,11 @@ >> +#ifndef __INTEL_RENDERSTATE_GEN6 >> +#define __INTEL_RENDERSTATE_GEN6 >> + >> +static const uint32_t gen6_null_state_relocs[] = { >> +}; >> + >> +static const uint32_t gen6_null_state_batch[] = { >> + MI_BATCH_BUFFER_END, >> +}; >> + >> +#endif /* __INTEL_RENDERSTATE_GEN6 */ >> diff --git a/drivers/gpu/drm/i915/intel_renderstate_gen7.h b/drivers/gpu/drm/i915/intel_renderstate_gen7.h >> new file mode 100644 >> index 0000000..9b1420b >> --- /dev/null >> +++ b/drivers/gpu/drm/i915/intel_renderstate_gen7.h >> @@ -0,0 +1,11 @@ >> +#ifndef __INTEL_RENDERSTATE_GEN7 >> +#define __INTEL_RENDERSTATE_GEN7 >> + >> +static const uint32_t gen7_null_state_relocs[] = { >> +}; >> + >> +static const uint32_t gen7_null_state_batch[] = { >> + MI_BATCH_BUFFER_END, >> +}; >> + >> +#endif /* __INTEL_RENDERSTATE_GEN7 */ >> diff --git a/drivers/gpu/drm/i915/intel_renderstate_gen8.h b/drivers/gpu/drm/i915/intel_renderstate_gen8.h >> new file mode 100644 >> index 0000000..d349dda >> --- /dev/null >> +++ b/drivers/gpu/drm/i915/intel_renderstate_gen8.h >> @@ -0,0 +1,11 @@ >> +#ifndef __INTEL_RENDERSTATE_GEN8 >> +#define __INTEL_RENDERSTATE_GEN8 >> + >> +static const uint32_t gen8_null_state_relocs[] = { >> +}; >> + >> +static const uint32_t gen8_null_state_batch[] = { >> + MI_BATCH_BUFFER_END, >> +}; >> + >> +#endif /* __INTEL_RENDERSTATE_GEN8 */ >> -- >> 1.7.9.5 >> >> _______________________________________________ >> Intel-gfx mailing list >> Intel-gfx@xxxxxxxxxxxxxxxxxxxxx >> http://lists.freedesktop.org/mailman/listinfo/intel-gfx > > -- > Ben Widawsky, Intel Open Source Technology Center _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx