On Tue, Apr 22, 2014 at 08:19:43PM +0300, Mika Kuoppala wrote: > HW guys say that it is not a cool idea to let device > go into rc6 without proper 3d pipeline state. > > For each new uninitialized context, generate a > valid null render state to be run on context > creation. > > This patch introduces a skeleton with emty states. > > Signed-off-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxx> > --- > drivers/gpu/drm/i915/Makefile | 1 + > drivers/gpu/drm/i915/i915_drv.h | 2 + > drivers/gpu/drm/i915/i915_gem_context.c | 7 + > drivers/gpu/drm/i915/i915_gem_render_state.c | 222 +++++++++++++++++++++++++ > drivers/gpu/drm/i915/intel_renderstate_gen6.h | 11 ++ > drivers/gpu/drm/i915/intel_renderstate_gen7.h | 11 ++ > drivers/gpu/drm/i915/intel_renderstate_gen8.h | 11 ++ > 7 files changed, 265 insertions(+) > create mode 100644 drivers/gpu/drm/i915/i915_gem_render_state.c > create mode 100644 drivers/gpu/drm/i915/intel_renderstate_gen6.h > create mode 100644 drivers/gpu/drm/i915/intel_renderstate_gen7.h > create mode 100644 drivers/gpu/drm/i915/intel_renderstate_gen8.h > > diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile > index b1445b7..b5d4029 100644 > --- a/drivers/gpu/drm/i915/Makefile > +++ b/drivers/gpu/drm/i915/Makefile > @@ -18,6 +18,7 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o > # GEM code > i915-y += i915_cmd_parser.o \ > i915_gem_context.o \ > + i915_gem_render_state.o \ > i915_gem_debug.o \ > i915_gem_dmabuf.o \ > i915_gem_evict.o \ > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 43b022c..f6ae2ee 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -2330,6 +2330,8 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, > int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, > struct drm_file *file); > > +/* i915_gem_render_state.c */ > +int i915_gem_init_render_state(struct intel_ring_buffer *ring); > /* i915_gem_evict.c */ > int __must_check i915_gem_evict_something(struct drm_device *dev, > struct i915_address_space *vm, > diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c > index 30b355a..d648d4d 100644 > --- a/drivers/gpu/drm/i915/i915_gem_context.c > +++ b/drivers/gpu/drm/i915/i915_gem_context.c > @@ -746,6 +746,13 @@ static int do_switch(struct intel_ring_buffer *ring, > /* obj is kept alive until the next request by its active ref */ > i915_gem_object_ggtt_unpin(from->obj); > i915_gem_context_unreference(from); > + } else { > + if (ring->id == RCS && to->is_initialized == false) { > + > + ret = i915_gem_init_render_state(ring); > + if (ret) > + DRM_ERROR("init render state: %d\n", ret); > + } The way I thought we'd do this was to emit the state once for the default context, and copy those pages to the BO of the new context. > } > > to->is_initialized = true; > diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c > new file mode 100644 > index 0000000..409f99b > --- /dev/null > +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c > @@ -0,0 +1,222 @@ > +/* > + * Copyright © 2014 Intel Corporation > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the "Software"), > + * to deal in the Software without restriction, including without limitation > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice (including the next > + * paragraph) shall be included in all copies or substantial portions of the > + * Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS > + * IN THE SOFTWARE. > + * > + * Authors: > + * Mika Kuoppala <mika.kuoppala@xxxxxxxxx> > + * > + */ > + > +#include "i915_drv.h" > + > +#include "intel_renderstate_gen6.h" > +#include "intel_renderstate_gen7.h" > +#include "intel_renderstate_gen8.h" > + > +#define BATCH_MAX_SIZE 4096 > + > +struct i915_render_state { > + struct drm_i915_gem_object *obj; > + unsigned long ggtt_offset; > + void *batch; > + u32 size; > + u32 len; > +}; > + > +static struct i915_render_state * > +render_state_alloc(struct drm_device *dev, unsigned int size) > +{ > + struct i915_render_state *so; > + int ret; > + > + so = kzalloc(sizeof(*so), GFP_KERNEL); > + if (!so) > + return ERR_PTR(-ENOMEM); > + > + so->size = ALIGN(size, 4096); > + > + so->obj = i915_gem_alloc_object(dev, so->size); > + if (so->obj == NULL) { > + ret = -ENOMEM; > + goto free; > + } > + > + ret = i915_gem_obj_ggtt_pin(so->obj, 4096, 0); > + if (ret) > + goto free_gem; Using dispatch the way you do below, we'll actually be executing out of the PPGTT, and not the global. While this makes no difference on the current code, it does make a difference for some of my plans. I don't see a clean way to fix this with the current code base, I am just complaining. > + > + so->batch = i915_gem_vmap_obj(so->obj); > + if (!so->batch) { > + ret = -ENOMEM; > + goto unpin; > + } > + > + so->ggtt_offset = i915_gem_obj_ggtt_offset(so->obj); > + > + return so; > +unpin: > + i915_gem_object_ggtt_unpin(so->obj); > +free_gem: > + drm_gem_object_unreference(&so->obj->base); > +free: > + kfree(so); > + return ERR_PTR(ret); > +} > + > +static void render_state_free(struct i915_render_state *so) > +{ > + vunmap(so->batch); > + i915_gem_object_ggtt_unpin(so->obj); > + drm_gem_object_unreference(&so->obj->base); > + kfree(so); > +} > + > +struct render_state_ro_data { > + const u32 *batch; > + unsigned int batch_items; > + const u32 *reloc; > + unsigned int reloc_items; > + bool reloc_64bit; > +}; > + > +static int render_state_copy(struct i915_render_state *so, > + const struct render_state_ro_data *source) > +{ > + const u64 goffset = i915_gem_obj_ggtt_offset(so->obj); > + const unsigned int num_relocs = source->reloc_items; > + int reloc_index = 0; > + u32 *d = (uint32_t *)so->batch; > + unsigned int i = 0; > + > + if (source->batch_items * sizeof(*source->batch) > so->size) > + return -EINVAL; > + > + while (i < source->batch_items) { > + u32 s = source->batch[i]; > + > + if (reloc_index < num_relocs && > + i * 4 == source->reloc[reloc_index]) { > + > + s += goffset & 0xffffffff; > + > + /* We keep batch offsets max 32bit */ > + if (source->reloc_64bit) { > + if (i + 1 >= source->batch_items || > + source->batch[i + 1] != 0) > + return -EINVAL; > + > + d[i] = s; > + i++; > + s = (goffset & 0xffffffff00000000ull) >> 32; > + } > + > + reloc_index++; > + } > + > + d[i] = s; > + i++; > + } > + > + if (num_relocs != reloc_index) { > + DRM_ERROR("not all relocs resolved, %d out of %d\n", > + reloc_index, num_relocs); > + return -EINVAL; > + } > + > + so->len = source->batch_items * sizeof(*source->batch); > + > + return 0; > +} > + > +static int render_state_get(struct i915_render_state *so, int gen) > +{ > + struct render_state_ro_data ro_data; > + > +#define RS_SETUP(_d, _g, _r) { \ > + if (!sizeof(gen ## _g ## _null_state_batch)) \ > + return -EINVAL; \ > + if (sizeof(gen ## _g ## _null_state_batch) & 0x3) \ > + return -EINVAL; \ > + if (sizeof(gen ## _g ## _null_state_relocs) & 0x3) \ > + return -EINVAL; \ > + _d.batch = gen ## _g ## _null_state_batch; \ > + _d.batch_items = sizeof(gen ##_g ## _null_state_batch) / 4; \ > + _d.reloc = gen ## _g ## _null_state_relocs; \ > + _d.reloc_items = sizeof(gen ## _g ## _null_state_relocs) / 4; \ > + _d.reloc_64bit = _r; \ > + } > + > + switch (gen) { > + case 6: > + RS_SETUP(ro_data, 6, false); > + break; > + case 7: > + RS_SETUP(ro_data, 7, false); > + break; I'm a bit surprised we have no differences for HSW/BYT/IVB. > + case 8: > + RS_SETUP(ro_data, 8, true); > + break; > + default: > + return -ENOENT; > + } > +#undef RS_SETUP > + > + return render_state_copy(so, &ro_data); > +} > + > +int i915_gem_init_render_state(struct intel_ring_buffer *ring) > +{ > + const int gen = INTEL_INFO(ring->dev)->gen; > + struct i915_render_state *so; > + u32 seqno; > + int ret; > + > + if (gen < 6) > + return 0; HAS_HW_CONTEXTS ickle: "Never forget ILK contexts" > + > + so = render_state_alloc(ring->dev, BATCH_MAX_SIZE); > + if (IS_ERR(so)) > + return PTR_ERR(so); > + > + ret = render_state_get(so, gen); > + if (ret) > + goto out; > + > + ret = ring->dispatch_execbuffer(ring, > + i915_gem_obj_ggtt_offset(so->obj), > + so->len, > + I915_DISPATCH_SECURE); > + if (ret) > + goto out; > + > + ret = intel_ring_flush_all_caches(ring); > + if (ret) > + goto out; > + > + ret = i915_add_request(ring, &seqno); > + if (ret) > + goto out; > + > + ret = i915_wait_seqno(ring, seqno); This wait is going to hurt. Not seeing why it's needed (plus I still recommend my idea on the top). > +out: > + render_state_free(so); > + return ret; > +} > diff --git a/drivers/gpu/drm/i915/intel_renderstate_gen6.h b/drivers/gpu/drm/i915/intel_renderstate_gen6.h > new file mode 100644 > index 0000000..4809f2f > --- /dev/null > +++ b/drivers/gpu/drm/i915/intel_renderstate_gen6.h > @@ -0,0 +1,11 @@ > +#ifndef __INTEL_RENDERSTATE_GEN6 > +#define __INTEL_RENDERSTATE_GEN6 > + > +static const uint32_t gen6_null_state_relocs[] = { > +}; > + > +static const uint32_t gen6_null_state_batch[] = { > + MI_BATCH_BUFFER_END, > +}; > + > +#endif /* __INTEL_RENDERSTATE_GEN6 */ > diff --git a/drivers/gpu/drm/i915/intel_renderstate_gen7.h b/drivers/gpu/drm/i915/intel_renderstate_gen7.h > new file mode 100644 > index 0000000..9b1420b > --- /dev/null > +++ b/drivers/gpu/drm/i915/intel_renderstate_gen7.h > @@ -0,0 +1,11 @@ > +#ifndef __INTEL_RENDERSTATE_GEN7 > +#define __INTEL_RENDERSTATE_GEN7 > + > +static const uint32_t gen7_null_state_relocs[] = { > +}; > + > +static const uint32_t gen7_null_state_batch[] = { > + MI_BATCH_BUFFER_END, > +}; > + > +#endif /* __INTEL_RENDERSTATE_GEN7 */ > diff --git a/drivers/gpu/drm/i915/intel_renderstate_gen8.h b/drivers/gpu/drm/i915/intel_renderstate_gen8.h > new file mode 100644 > index 0000000..d349dda > --- /dev/null > +++ b/drivers/gpu/drm/i915/intel_renderstate_gen8.h > @@ -0,0 +1,11 @@ > +#ifndef __INTEL_RENDERSTATE_GEN8 > +#define __INTEL_RENDERSTATE_GEN8 > + > +static const uint32_t gen8_null_state_relocs[] = { > +}; > + > +static const uint32_t gen8_null_state_batch[] = { > + MI_BATCH_BUFFER_END, > +}; > + > +#endif /* __INTEL_RENDERSTATE_GEN8 */ > -- > 1.7.9.5 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > http://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Ben Widawsky, Intel Open Source Technology Center _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx