On Wed, Jul 01, 2015 at 10:27:21AM +0100, Chris Wilson wrote: > Dave Gordon made the good suggestion that once the ringbuffers were > setup, the actual queuing of commands to program the initial GPU state > could be deferred. Since that initial state contains instructions for > setting up the first power context, we want to execute that as earlier > as possible, preferrably in the background to userspace. Then when > userspace does wake up, the first time it opens the device we just need > to flush the work to be sure that our commands are queued before any of > userspace's. (Hooking into the device open should mean we have to check > less often than say hooking into execbuffer.) > > Suggested-by: Dave Gordon <david.s.gordon@xxxxxxxxx> > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Cc: Dave Gordon <david.s.gordon@xxxxxxxxx> Just before this gets a bit out of hand with various patches floating around ... I really meant it when I said that we should have a proper design discussion about this in Jesse's meeting first. Looking at all the ideas between you, Dave & me I count about 3-4 approaches to async gem init, and all have upsides and downsides. Aside from that I concur that if we do async gem init then it better be a worker and not relying on some abitrary userspace ioctl/syscall. Of course we'd still need to place proper synchronization points at a good place (flush_work in gem_open for Dave's design), but that's really orthogonal to running it in a worker imo. -Daniel > --- > drivers/gpu/drm/i915/i915_drv.h | 2 + > drivers/gpu/drm/i915/i915_gem.c | 113 +++++++++++++++++++++++++++------------- > 2 files changed, 79 insertions(+), 36 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 3ea1fe8db63e..d4003dea97eb 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -1938,6 +1938,8 @@ struct drm_i915_private { > > bool edp_low_vswing; > > + struct work_struct init_hw_late; > + > /* > * NOTE: This is the dri1/ums dungeon, don't add stuff here. Your patch > * will be rejected. Instead look for a better place. > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 2f0fed1b9dd7..7efa71f8edd7 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -5140,12 +5140,76 @@ cleanup_render_ring: > return ret; > } > > +static int > +i915_gem_init_hw_late(struct drm_i915_private *dev_priv) > +{ > + struct intel_engine_cs *ring; > + int i, j; > + > + for_each_ring(ring, dev_priv, i) { > + struct drm_i915_gem_request *req; > + int ret; > + > + if (WARN_ON(!ring->default_context)) { > + ret = -ENODEV; > + goto err; > + } > + > + req = i915_gem_request_alloc(ring, ring->default_context); > + if (IS_ERR(req)) { > + ret = PTR_ERR(req); > + goto err; > + } > + > + if (ring->id == RCS) { > + for (j = 0; j < NUM_L3_SLICES(dev_priv); j++) > + i915_gem_l3_remap(req, j); > + } > + > + ret = i915_ppgtt_init_ring(req); > + if (ret) { > + DRM_ERROR("PPGTT enable ring #%d failed %d\n", i, ret); > + goto err_req; > + } > + > + ret = i915_gem_context_enable(req); > + if (ret) { > + DRM_ERROR("Context enable ring #%d failed %d\n", i, ret); > + goto err_req; > + } > + > + i915_add_request_no_flush(req); > + continue; > + > +err_req: > + i915_gem_request_cancel(req); > +err: > + return ret; > + } > + > + return 0; > +} > + > +static void > +i915_gem_init_hw_worker(struct work_struct *work) > +{ > + struct drm_i915_private *dev_priv = > + container_of(work, typeof(*dev_priv), init_hw_late); > + mutex_lock(&dev_priv->dev->struct_mutex); > + if (i915_gem_init_hw_late(dev_priv)) { > + DRM_ERROR("Failed to initialize GPU, declaring it wedged\n"); > + atomic_set_mask(I915_WEDGED, > + &dev_priv->gpu_error.reset_counter); > + } > + mutex_unlock(&dev_priv->dev->struct_mutex); > +} > + > int > i915_gem_init_hw(struct drm_device *dev) > { > struct drm_i915_private *dev_priv = dev->dev_private; > struct intel_engine_cs *ring; > - int ret, i, j; > + int ret, i; > > if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt()) > return -EIO; > @@ -5198,41 +5262,10 @@ i915_gem_init_hw(struct drm_device *dev) > } > > /* Now it is safe to go back round and do everything else: */ > - for_each_ring(ring, dev_priv, i) { > - struct drm_i915_gem_request *req; > - > - WARN_ON(!ring->default_context); > - > - req = i915_gem_request_alloc(ring, ring->default_context); > - if (IS_ERR(req)) { > - ret = PTR_ERR(req); > - i915_gem_cleanup_ringbuffer(dev); > - goto out; > - } > - > - if (ring->id == RCS) { > - for (j = 0; j < NUM_L3_SLICES(dev); j++) > - i915_gem_l3_remap(req, j); > - } > - > - ret = i915_ppgtt_init_ring(req); > - if (ret && ret != -EIO) { > - DRM_ERROR("PPGTT enable ring #%d failed %d\n", i, ret); > - i915_gem_request_cancel(req); > - i915_gem_cleanup_ringbuffer(dev); > - goto out; > - } > - > - ret = i915_gem_context_enable(req); > - if (ret && ret != -EIO) { > - DRM_ERROR("Context enable ring #%d failed %d\n", i, ret); > - i915_gem_request_cancel(req); > - i915_gem_cleanup_ringbuffer(dev); > - goto out; > - } > - > - i915_add_request_no_flush(req); > - } > + if (dev->open_count == 0) /* uncontested with userspace, i.e. boot */ > + queue_work(dev_priv->wq, &dev_priv->init_hw_late); > + else > + ret = i915_gem_init_hw_late(dev_priv); > > out: > intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); > @@ -5379,6 +5412,7 @@ i915_gem_load(struct drm_device *dev) > init_ring_lists(&dev_priv->ring[i]); > for (i = 0; i < I915_MAX_NUM_FENCES; i++) > INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list); > + INIT_WORK(&dev_priv->init_hw_late, i915_gem_init_hw_worker); > INIT_DELAYED_WORK(&dev_priv->mm.retire_work, > i915_gem_retire_work_handler); > INIT_DELAYED_WORK(&dev_priv->mm.idle_work, > @@ -5442,11 +5476,18 @@ void i915_gem_release(struct drm_device *dev, struct drm_file *file) > > int i915_gem_open(struct drm_device *dev, struct drm_file *file) > { > + struct drm_i915_private *dev_priv = to_i915(dev); > struct drm_i915_file_private *file_priv; > int ret; > > DRM_DEBUG_DRIVER("\n"); > > + /* Flush ring initialisation before userspace can submit its own > + * batches, so the hardware initialisation commands are queued > + * first. > + */ > + flush_work(&dev_priv->init_hw_late); > + > file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); > if (!file_priv) > return -ENOMEM; > -- > 2.1.4 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > http://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx