On Sun, Mar 18, 2012 at 01:39:45PM -0700, Ben Widawsky wrote: > Implement the context switch code as well as the interfaces to do the > context switch. This patch also doesn't match 1:1 with the RFC patches. > The main difference is that from Daniel's responses the last context > object is now stored instead of the last context. This aids in allows us > to free the context data structure, and context object independently. > > There is room for optimization: this code will pin the context object > until the next context is active. The optimal way to do it is to > actually pin the object, move it to the active list, do the context > switch, and then unpin it. This allows the eviction code to actually > evict the context object if needed. > > The context switch code is missing workarounds, they will be implemented > in future patches. > > Signed-off-by: Ben Widawsky <ben at bwidawsk.net> Ok, I've looked at the use-sites of context_get and all this refcounting and noticed that: - we always hold dev->struct_mutex - we always drop the acquired reference to the context structure in the same function without dropping struct_mutex in between. So we don't seem to require any reference counting on these (and additional locking on the idr). Additionally the idr locking seems to give us a false sense of security because afaics the locking/refcounting would be broken when we would _not_ hold struct_mutex. So can we just rip this out or do we need this (in which case it needs some more work imo)? -Daniel > --- > drivers/gpu/drm/i915/i915_drv.h | 5 ++ > drivers/gpu/drm/i915/i915_gem_context.c | 118 ++++++++++++++++++++++++++++++- > drivers/gpu/drm/i915/intel_ringbuffer.c | 26 +++++++ > drivers/gpu/drm/i915/intel_ringbuffer.h | 5 ++ > 4 files changed, 153 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index f458a8f..c6c2ada 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -1303,10 +1303,15 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, > enum i915_cache_level cache_level); > > /* i915_gem_context.c */ > +#define I915_CONTEXT_FORCED_SWITCH (1<<0) > +#define I915_CONTEXT_INITIAL_SWITCH (1<<1) > void i915_gem_context_load(struct drm_device *dev); > void i915_gem_context_unload(struct drm_device *dev); > void i915_gem_context_open(struct drm_device *dev, struct drm_file *file); > void i915_gem_context_close(struct drm_device *dev, struct drm_file *file); > +int i915_switch_context(struct intel_ring_buffer *ring, > + struct drm_file *file, > + int to_id, u32 seqno, u32 flags); > > /* i915_gem_gtt.c */ > int __must_check i915_gem_init_aliasing_ppgtt(struct drm_device *dev); > diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c > index 321bafd..cc508d5 100644 > --- a/drivers/gpu/drm/i915/i915_gem_context.c > +++ b/drivers/gpu/drm/i915/i915_gem_context.c > @@ -262,7 +262,7 @@ void i915_gem_context_close(struct drm_device *dev, struct drm_file *file) > mutex_unlock(&dev->struct_mutex); > } > > -static __used struct i915_hw_context * > +static struct i915_hw_context * > i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id) > { > struct i915_hw_context *ctx = NULL; > @@ -279,3 +279,119 @@ i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id) > > return ctx; > } > + > +static int do_switch(struct drm_i915_gem_object *from_obj, > + struct i915_hw_context *to, > + u32 seqno, u32 flags) > +{ > + bool initial_switch = (flags & I915_CONTEXT_INITIAL_SWITCH) ? true : false; > + bool force = (flags & I915_CONTEXT_FORCED_SWITCH) ? true : false; > + struct intel_ring_buffer *ring = NULL; > + u32 hw_flags = 0; > + int ret; > + > + BUG_ON(to == NULL); > + BUG_ON(from_obj != NULL && from_obj->pin_count == 0); > + BUG_ON((from_obj != NULL && from_obj->context_id < 0) || to->obj->context_id < 0); > + > + ret = i915_gem_object_pin(to->obj, CONTEXT_ALIGN, false); > + if (ret) > + return ret; > + > + if (initial_switch) > + hw_flags |= MI_RESTORE_INHIBIT; > + if (force) > + hw_flags |= MI_FORCE_RESTORE; > + > + ring = to->ring; > + ret = intel_ring_mi_set_context(ring, to, hw_flags); > + if (ret) { > + i915_gem_object_unpin(to->obj); > + return ret; > + } > + > + /* The backing object for the context is done after switching to the > + * *next* context. Therefore we cannot retire the previous context until > + * the next context has already started running. In fact, the below code > + * is a bit suboptimal because the retiring can occur simply after the > + * MI_SET_CONTEXT instead of when the next seqno has completed. > + */ > + if (from_obj != NULL) { > + i915_gem_object_move_to_active(from_obj, ring, seqno); > + /* As long as MI_SET_CONTEXT is serializing, ie. it flushes the > + * whole damn pipeline, we don't need to explicitly mark the > + * object dirty. It should be safe to evict the object at any > + * point after MI_SET_CONTEXT has finished executing... true as > + * of GEN7. If not from_obj->dirty=1 would make this safer. > + */ > + BUG_ON(from_obj->ring != to->ring); > + } > + > + if (from_obj) > + i915_gem_object_unpin(from_obj); > + > + ring->last_context_obj = to->obj; > + > + return 0; > +} > + > +/** > + * i915_switch_context() - perform a GPU context switch. > + * @ring: ring for which we'll execute the context switch > + * @file_priv: file_priv associated with the context, may be NULL > + * @id: context id number > + * @seqno: sequence number by which the new context will be switched to > + * @flags: > + * > + * This function will perform the context switch to the given context id on the > + * specified ring. Unfortunately the context switch code doesn't have an > + * independent way of knowing when the context switch has occurred, so the > + * caller must notify of us a time by which the context switch has occurred. > + */ > +int i915_switch_context(struct intel_ring_buffer *ring, > + struct drm_file *file, > + int to_id, u32 seqno, u32 flags) > +{ > + struct drm_i915_private *dev_priv = ring->dev->dev_private; > + struct drm_i915_file_private *file_priv = NULL; > + struct i915_hw_context *to; > + struct drm_i915_gem_object *from_obj = ring->last_context_obj; > + int ret; > + > + if (dev_priv->hw_contexts_disabled) > + return 0; > + > + if (ring != &dev_priv->ring[RCS]) > + return 0; > + > + if (file) > + file_priv = file->driver_priv; > + > + if (to_id == DEFAULT_CONTEXT_ID) { > + to = ring->default_context; > + kref_get(&to->nref); > + } else { > + to = i915_gem_context_get(file_priv, to_id); > + if (to == NULL) > + return -EINVAL; > + } > + drm_gem_object_reference(&to->obj->base); > + > + /* If the object is still on the active list, we can shortcut */ > + if (from_obj == to->obj) { > + ret = 0; > + goto out; > + } > + > + ret = do_switch(from_obj, to, seqno, flags); > + if (ret) { > + drm_gem_object_unreference(&to->obj->base); > + goto out; > + } > + > +out: > + if (from_obj != NULL) > + drm_gem_object_unreference(&from_obj->base); > + kref_put(&to->nref, destroy_hw_context); > + return ret; > +} > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c > index ca3972f..cd74f86 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.c > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c > @@ -920,6 +920,32 @@ render_ring_dispatch_execbuffer(struct intel_ring_buffer *ring, > return 0; > } > > +int intel_ring_mi_set_context(struct intel_ring_buffer *ring, > + struct i915_hw_context *new_context, > + u32 hw_flags) > +{ > + int ret; > + > + ret = intel_ring_begin(ring, 4); > + if (ret) > + return ret; > + > + intel_ring_emit(ring, MI_NOOP); > + intel_ring_emit(ring, MI_SET_CONTEXT); > + intel_ring_emit(ring, new_context->obj->gtt_offset | > + MI_MM_SPACE_GTT | > + MI_SAVE_EXT_STATE_EN | > + MI_RESTORE_EXT_STATE_EN | > + hw_flags); > + /* w/a: MI_SET_CONTEXT must always be followed by MI_NOOP */ > + intel_ring_emit(ring, MI_NOOP); > + > + intel_ring_advance(ring); > + > + return ret; > + > +} > + > static void cleanup_status_page(struct intel_ring_buffer *ring) > { > drm_i915_private_t *dev_priv = ring->dev->dev_private; > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h > index 8c9f898..0ed98bb 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h > @@ -121,6 +121,7 @@ struct intel_ring_buffer { > drm_local_map_t map; > > struct i915_hw_context *default_context; > + struct drm_i915_gem_object *last_context_obj; > > void *private; > }; > @@ -216,6 +217,10 @@ static inline void i915_trace_irq_get(struct intel_ring_buffer *ring, u32 seqno) > ring->trace_irq_seqno = seqno; > } > > +int intel_ring_mi_set_context(struct intel_ring_buffer *ring, > + struct i915_hw_context *new_context, > + u32 hw_flags); > + > /* DRI warts */ > int intel_render_ring_init_dri(struct drm_device *dev, u64 start, u32 size); > > -- > 1.7.9.4 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx at lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Daniel Vetter Mail: daniel at ffwll.ch Mobile: +41 (0)79 365 57 48