Re: [PATCH 2/3] drm/i915: add render state initialization

Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx> · Tue, 06 May 2014 16:33:23 +0300

Ben Widawsky <ben@xxxxxxxxxxxx> writes:

> On Tue, Apr 22, 2014 at 08:19:43PM +0300, Mika Kuoppala wrote:
>> HW guys say that it is not a cool idea to let device
>> go into rc6 without proper 3d pipeline state.
>> 
>> For each new uninitialized context, generate a
>> valid null render state to be run on context
>> creation.
>> 
>> This patch introduces a skeleton with emty states.
>> 
>> Signed-off-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxx>
>> ---
>>  drivers/gpu/drm/i915/Makefile                 |    1 +
>>  drivers/gpu/drm/i915/i915_drv.h               |    2 +
>>  drivers/gpu/drm/i915/i915_gem_context.c       |    7 +
>>  drivers/gpu/drm/i915/i915_gem_render_state.c  |  222 +++++++++++++++++++++++++
>>  drivers/gpu/drm/i915/intel_renderstate_gen6.h |   11 ++
>>  drivers/gpu/drm/i915/intel_renderstate_gen7.h |   11 ++
>>  drivers/gpu/drm/i915/intel_renderstate_gen8.h |   11 ++
>>  7 files changed, 265 insertions(+)
>>  create mode 100644 drivers/gpu/drm/i915/i915_gem_render_state.c
>>  create mode 100644 drivers/gpu/drm/i915/intel_renderstate_gen6.h
>>  create mode 100644 drivers/gpu/drm/i915/intel_renderstate_gen7.h
>>  create mode 100644 drivers/gpu/drm/i915/intel_renderstate_gen8.h
>> 
>> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
>> index b1445b7..b5d4029 100644
>> --- a/drivers/gpu/drm/i915/Makefile
>> +++ b/drivers/gpu/drm/i915/Makefile
>> @@ -18,6 +18,7 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o
>>  # GEM code
>>  i915-y += i915_cmd_parser.o \
>>  	  i915_gem_context.o \
>> +	  i915_gem_render_state.o \
>>  	  i915_gem_debug.o \
>>  	  i915_gem_dmabuf.o \
>>  	  i915_gem_evict.o \
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>> index 43b022c..f6ae2ee 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -2330,6 +2330,8 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
>>  int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
>>  				   struct drm_file *file);
>>  
>> +/* i915_gem_render_state.c */
>> +int i915_gem_init_render_state(struct intel_ring_buffer *ring);
>>  /* i915_gem_evict.c */
>>  int __must_check i915_gem_evict_something(struct drm_device *dev,
>>  					  struct i915_address_space *vm,
>> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
>> index 30b355a..d648d4d 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_context.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
>> @@ -746,6 +746,13 @@ static int do_switch(struct intel_ring_buffer *ring,
>>  		/* obj is kept alive until the next request by its active ref */
>>  		i915_gem_object_ggtt_unpin(from->obj);
>>  		i915_gem_context_unreference(from);
>> +	} else {
>> +		if (ring->id == RCS && to->is_initialized == false) {
>> +
>> +			ret = i915_gem_init_render_state(ring);
>> +			if (ret)
>> +				DRM_ERROR("init render state: %d\n", ret);
>> +		}
>
> The way I thought we'd do this was to emit the state once for the
> default context, and copy those pages to the BO of the new context.

The state is big (on bdw), 18 pages. More on cover letter of v2 series:
1399382766-25116-1-git-send-email-mika.kuoppala@xxxxxxxxx

>>  	}
>>  
>>  	to->is_initialized = true;
>> diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
>> new file mode 100644
>> index 0000000..409f99b
>> --- /dev/null
>> +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
>> @@ -0,0 +1,222 @@
>> +/*
>> + * Copyright © 2014 Intel Corporation
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a
>> + * copy of this software and associated documentation files (the "Software"),
>> + * to deal in the Software without restriction, including without limitation
>> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice (including the next
>> + * paragraph) shall be included in all copies or substantial portions of the
>> + * Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
>> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
>> + * IN THE SOFTWARE.
>> + *
>> + * Authors:
>> + *    Mika Kuoppala <mika.kuoppala@xxxxxxxxx>
>> + *
>> + */
>> +
>> +#include "i915_drv.h"
>> +
>> +#include "intel_renderstate_gen6.h"
>> +#include "intel_renderstate_gen7.h"
>> +#include "intel_renderstate_gen8.h"
>> +
>> +#define BATCH_MAX_SIZE 4096
>> +
>> +struct i915_render_state {
>> +	struct drm_i915_gem_object *obj;
>> +	unsigned long ggtt_offset;
>> +	void *batch;
>> +	u32 size;
>> +	u32 len;
>> +};
>> +
>> +static struct i915_render_state *
>> +render_state_alloc(struct drm_device *dev, unsigned int size)
>> +{
>> +	struct i915_render_state *so;
>> +	int ret;
>> +
>> +	so = kzalloc(sizeof(*so), GFP_KERNEL);
>> +	if (!so)
>> +		return ERR_PTR(-ENOMEM);
>> +
>> +	so->size = ALIGN(size, 4096);
>> +
>> +	so->obj = i915_gem_alloc_object(dev, so->size);
>> +	if (so->obj == NULL) {
>> +		ret = -ENOMEM;
>> +		goto free;
>> +	}
>> +
>> +	ret = i915_gem_obj_ggtt_pin(so->obj, 4096, 0);
>> +	if (ret)
>> +		goto free_gem;
>
> Using dispatch the way you do below, we'll actually be executing out of
> the PPGTT, and not the global. While this makes no difference on the
> current code, it does make a difference for some of my plans. I don't
> see a clean way to fix this with the current code base, I am just
> complaining.
>
>> +
>> +	so->batch = i915_gem_vmap_obj(so->obj);
>> +	if (!so->batch) {
>> +		ret = -ENOMEM;
>> +		goto unpin;
>> +	}
>> +
>> +	so->ggtt_offset = i915_gem_obj_ggtt_offset(so->obj);
>> +
>> +	return so;
>> +unpin:
>> +	i915_gem_object_ggtt_unpin(so->obj);
>> +free_gem:
>> +	drm_gem_object_unreference(&so->obj->base);
>> +free:
>> +	kfree(so);
>> +	return ERR_PTR(ret);
>> +}
>> +
>> +static void render_state_free(struct i915_render_state *so)
>> +{
>> +	vunmap(so->batch);
>> +	i915_gem_object_ggtt_unpin(so->obj);
>> +	drm_gem_object_unreference(&so->obj->base);
>> +	kfree(so);
>> +}
>> +
>> +struct render_state_ro_data {
>> +	const u32 *batch;
>> +	unsigned int batch_items;
>> +	const u32 *reloc;
>> +	unsigned int reloc_items;
>> +	bool reloc_64bit;
>> +};
>> +
>> +static int render_state_copy(struct i915_render_state *so,
>> +			     const struct render_state_ro_data *source)
>> +{
>> +	const u64 goffset = i915_gem_obj_ggtt_offset(so->obj);
>> +	const unsigned int num_relocs = source->reloc_items;
>> +	int reloc_index = 0;
>> +	u32 *d = (uint32_t *)so->batch;
>> +	unsigned int i = 0;
>> +
>> +	if (source->batch_items * sizeof(*source->batch) > so->size)
>> +		return -EINVAL;
>> +
>> +	while (i < source->batch_items) {
>> +		u32 s = source->batch[i];
>> +
>> +		if (reloc_index < num_relocs &&
>> +		    i * 4  == source->reloc[reloc_index]) {
>> +
>> +			s += goffset & 0xffffffff;
>> +
>> +			/* We keep batch offsets max 32bit */
>> +			if (source->reloc_64bit) {
>> +				if (i + 1 >= source->batch_items ||
>> +				    source->batch[i + 1] != 0)
>> +					return -EINVAL;
>> +
>> +				d[i] = s;
>> +				i++;
>> +				s = (goffset & 0xffffffff00000000ull) >> 32;
>> +			}
>> +
>> +			reloc_index++;
>> +		}
>> +
>> +		d[i] = s;
>> +		i++;
>> +	}
>> +
>> +	if (num_relocs != reloc_index) {
>> +		DRM_ERROR("not all relocs resolved, %d out of %d\n",
>> +			  reloc_index, num_relocs);
>> +		return -EINVAL;
>> +	}
>> +
>> +	so->len = source->batch_items * sizeof(*source->batch);
>> +
>> +	return 0;
>> +}
>> +
>> +static int render_state_get(struct i915_render_state *so, int gen)
>> +{
>> +	struct render_state_ro_data ro_data;
>> +
>> +#define RS_SETUP(_d, _g, _r) {						\
>> +		if (!sizeof(gen ## _g ## _null_state_batch))		\
>> +			return -EINVAL;					\
>> +		if (sizeof(gen ## _g ## _null_state_batch) & 0x3)	\
>> +			return -EINVAL;					\
>> +		if (sizeof(gen ## _g ## _null_state_relocs) & 0x3)	\
>> +			return -EINVAL;					\
>> +		_d.batch = gen ## _g ## _null_state_batch;		\
>> +		_d.batch_items = sizeof(gen ##_g ## _null_state_batch) / 4; \
>> +		_d.reloc = gen ## _g ## _null_state_relocs;		\
>> +		_d.reloc_items = sizeof(gen ## _g ## _null_state_relocs) / 4; \
>> +		_d.reloc_64bit = _r;					\
>> +	}
>> +
>> +	switch (gen) {
>> +	case 6:
>> +		RS_SETUP(ro_data, 6, false);
>> +		break;
>> +	case 7:
>> +		RS_SETUP(ro_data, 7, false);
>> +		break;
>
> I'm a bit surprised we have no differences for HSW/BYT/IVB.

The state generators need eyes on them for sure. There was some difference
between ivb/hsw in rendercopy. But for the null state, I gathered it
doesnt matter.

>> +	case 8:
>> +		RS_SETUP(ro_data, 8, true);
>> +		break;
>> +	default:
>> +		return -ENOENT;
>> +	}
>> +#undef RS_SETUP
>> +
>> +	return render_state_copy(so, &ro_data);
>> +}
>> +
>> +int i915_gem_init_render_state(struct intel_ring_buffer *ring)
>> +{
>> +	const int gen = INTEL_INFO(ring->dev)->gen;
>> +	struct i915_render_state *so;
>> +	u32 seqno;
>> +	int ret;
>> +
>> +	if (gen < 6)
>> +		return 0;
>
> HAS_HW_CONTEXTS
> ickle: "Never forget ILK contexts"

We have states only for >= snb. So thus the bailout.
And do_switch is not called with fake contexts.

>> +
>> +	so = render_state_alloc(ring->dev, BATCH_MAX_SIZE);
>> +	if (IS_ERR(so))
>> +		return PTR_ERR(so);
>> +
>> +	ret = render_state_get(so, gen);
>> +	if (ret)
>> +		goto out;
>> +
>> +	ret = ring->dispatch_execbuffer(ring,
>> +					i915_gem_obj_ggtt_offset(so->obj),
>> +					so->len,
>> +					I915_DISPATCH_SECURE);
>> +	if (ret)
>> +		goto out;
>> +
>> +	ret = intel_ring_flush_all_caches(ring);
>> +	if (ret)
>> +		goto out;
>> +
>> +	ret = i915_add_request(ring, &seqno);
>> +	if (ret)
>> +		goto out;
>> +
>> +	ret = i915_wait_seqno(ring, seqno);
>
> This wait is going to hurt. Not seeing why it's needed (plus I still
> recommend my idea on the top).

I couldn't come up with any explanations either. Removed in v2

Thanks,
-Mika

>> +out:
>> +	render_state_free(so);
>> +	return ret;
>> +}
>> diff --git a/drivers/gpu/drm/i915/intel_renderstate_gen6.h b/drivers/gpu/drm/i915/intel_renderstate_gen6.h
>> new file mode 100644
>> index 0000000..4809f2f
>> --- /dev/null
>> +++ b/drivers/gpu/drm/i915/intel_renderstate_gen6.h
>> @@ -0,0 +1,11 @@
>> +#ifndef __INTEL_RENDERSTATE_GEN6
>> +#define __INTEL_RENDERSTATE_GEN6
>> +
>> +static const uint32_t gen6_null_state_relocs[] = {
>> +};
>> +
>> +static const uint32_t gen6_null_state_batch[] = {
>> +	MI_BATCH_BUFFER_END,
>> +};
>> +
>> +#endif /* __INTEL_RENDERSTATE_GEN6 */
>> diff --git a/drivers/gpu/drm/i915/intel_renderstate_gen7.h b/drivers/gpu/drm/i915/intel_renderstate_gen7.h
>> new file mode 100644
>> index 0000000..9b1420b
>> --- /dev/null
>> +++ b/drivers/gpu/drm/i915/intel_renderstate_gen7.h
>> @@ -0,0 +1,11 @@
>> +#ifndef __INTEL_RENDERSTATE_GEN7
>> +#define __INTEL_RENDERSTATE_GEN7
>> +
>> +static const uint32_t gen7_null_state_relocs[] = {
>> +};
>> +
>> +static const uint32_t gen7_null_state_batch[] = {
>> +	MI_BATCH_BUFFER_END,
>> +};
>> +
>> +#endif /* __INTEL_RENDERSTATE_GEN7 */
>> diff --git a/drivers/gpu/drm/i915/intel_renderstate_gen8.h b/drivers/gpu/drm/i915/intel_renderstate_gen8.h
>> new file mode 100644
>> index 0000000..d349dda
>> --- /dev/null
>> +++ b/drivers/gpu/drm/i915/intel_renderstate_gen8.h
>> @@ -0,0 +1,11 @@
>> +#ifndef __INTEL_RENDERSTATE_GEN8
>> +#define __INTEL_RENDERSTATE_GEN8
>> +
>> +static const uint32_t gen8_null_state_relocs[] = {
>> +};
>> +
>> +static const uint32_t gen8_null_state_batch[] = {
>> +	MI_BATCH_BUFFER_END,
>> +};
>> +
>> +#endif /* __INTEL_RENDERSTATE_GEN8 */
>> -- 
>> 1.7.9.5
>> 
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
>> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
>
> -- 
> Ben Widawsky, Intel Open Source Technology Center
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
http://lists.freedesktop.org/mailman/listinfo/intel-gfx