2014/1/22 <naresh.kumar.kachhi@xxxxxxxxx>: > From: Naresh Kumar Kachhi <naresh.kumar.kachhi@xxxxxxxxx> > > GPU idleness is tracked by checking the request queue. Whenever > request queue is empty we assume that GPU is idle. When a new > set of commands sheduled on ring we call i915_add_request to > make sure these commands are tracked properly. However there are > few places which are not being treacked currently. > This patch introduces a new function add_request_wo_flush to track > such requests. add_request_wo_flush is same as add_request, only > difference is that it will not cause a flush. This is to avoid > any extra overhead while adding new request. > > To make sure Gfx is in D0 while there are still commands pending > on ring following is done. > - All the ioctls are already covered with get/put this makes sure > at the time of scheduling commands on GPU, Gfx is in D0 > - Once command scheduling is done, we call add_request to track > ring activity. > - We call get_noresume if this is first request (ioctl is already > covered with get_sync). > - put is called only when request_list becomes empty. i.e GPU is > idle and there are no pending commands on the rings > > Note: Make sure we don't do multiple add_request in same > ioctl/callback only one in the end is enough > The PC8 code already has an infrastructure to track GPU idleness, and I already submitted a patch to move that code to dev_priv->pm.gpu_idle. Please see this patch: http://patchwork.freedesktop.org/patch/16952/ . I suggest you should try to reuse this infrastructure, and if you find any problems, please add testcases to pm_pc8.c reproducing these problems. > Signed-off-by: Naresh Kumar Kachhi <naresh.kumar.kachhi@xxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_dma.c | 5 ++++ > drivers/gpu/drm/i915/i915_drv.h | 10 +++++-- > drivers/gpu/drm/i915/i915_gem.c | 27 ++++++++++++----- > drivers/gpu/drm/i915/i915_gem_execbuffer.c | 4 +-- > drivers/gpu/drm/i915/intel_display.c | 1 + > drivers/gpu/drm/i915/intel_drv.h | 3 ++ > drivers/gpu/drm/i915/intel_pm.c | 47 ++++++++++++++++++++++++++++++ > 7 files changed, 84 insertions(+), 13 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c > index ee9502b..b5af745 100644 > --- a/drivers/gpu/drm/i915/i915_dma.c > +++ b/drivers/gpu/drm/i915/i915_dma.c > @@ -454,6 +454,7 @@ static int i915_dispatch_cmdbuffer(struct drm_device * dev, > struct drm_clip_rect *cliprects, > void *cmdbuf) > { > + struct drm_i915_private *dev_priv = dev->dev_private; > int nbox = cmd->num_cliprects; > int i = 0, count, ret; > > @@ -480,6 +481,7 @@ static int i915_dispatch_cmdbuffer(struct drm_device * dev, > } > > i915_emit_breadcrumb(dev); > + i915_add_request_wo_flush(LP_RING(dev_priv)); > return 0; > } > > @@ -542,6 +544,7 @@ static int i915_dispatch_batchbuffer(struct drm_device * dev, > } > > i915_emit_breadcrumb(dev); > + i915_add_request_wo_flush(LP_RING(dev_priv)); > return 0; > } > > @@ -595,6 +598,7 @@ static int i915_dispatch_flip(struct drm_device * dev) > ADVANCE_LP_RING(); > } > > + i915_add_request_wo_flush(LP_RING(dev_priv)); > master_priv->sarea_priv->pf_current_page = dev_priv->dri1.current_page; > return 0; > } > @@ -768,6 +772,7 @@ static int i915_emit_irq(struct drm_device * dev) > OUT_RING(dev_priv->dri1.counter); > OUT_RING(MI_USER_INTERRUPT); > ADVANCE_LP_RING(); > + i915_add_request_wo_flush(LP_RING(dev_priv)); > } > > return dev_priv->dri1.counter; > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 56c720b..d1399f9 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -1324,6 +1324,7 @@ struct i915_package_c8 { > > struct i915_runtime_pm { > bool suspended; > + bool gpu_idle; > }; > > enum intel_pipe_crc_source { > @@ -2063,7 +2064,7 @@ static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj) > > int __must_check i915_mutex_lock_interruptible(struct drm_device *dev); > int i915_gem_object_sync(struct drm_i915_gem_object *obj, > - struct intel_ring_buffer *to); > + struct intel_ring_buffer *to, bool add_request); > void i915_vma_move_to_active(struct i915_vma *vma, > struct intel_ring_buffer *ring); > int i915_gem_dumb_create(struct drm_file *file_priv, > @@ -2139,9 +2140,12 @@ int __must_check i915_gem_suspend(struct drm_device *dev); > int __i915_add_request(struct intel_ring_buffer *ring, > struct drm_file *file, > struct drm_i915_gem_object *batch_obj, > - u32 *seqno); > + u32 *seqno, > + bool flush_caches); > #define i915_add_request(ring, seqno) \ > - __i915_add_request(ring, NULL, NULL, seqno) > + __i915_add_request(ring, NULL, NULL, seqno, true) > +#define i915_add_request_wo_flush(ring) \ > + __i915_add_request(ring, NULL, NULL, NULL, false) > int __must_check i915_wait_seqno(struct intel_ring_buffer *ring, > uint32_t seqno); > int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf); > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 024e454..3e8202e 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -2136,7 +2136,8 @@ i915_gem_get_seqno(struct drm_device *dev, u32 *seqno) > int __i915_add_request(struct intel_ring_buffer *ring, > struct drm_file *file, > struct drm_i915_gem_object *obj, > - u32 *out_seqno) > + u32 *out_seqno, > + bool flush_caches) > { > drm_i915_private_t *dev_priv = ring->dev->dev_private; > struct drm_i915_gem_request *request; > @@ -2152,9 +2153,11 @@ int __i915_add_request(struct intel_ring_buffer *ring, > * is that the flush _must_ happen before the next request, no matter > * what. > */ > - ret = intel_ring_flush_all_caches(ring); > - if (ret) > - return ret; > + if (flush_caches) { > + ret = intel_ring_flush_all_caches(ring); > + if (ret) > + return ret; > + } > > request = ring->preallocated_lazy_request; > if (WARN_ON(request == NULL)) > @@ -2219,6 +2222,7 @@ int __i915_add_request(struct intel_ring_buffer *ring, > &dev_priv->mm.retire_work, > round_jiffies_up_relative(HZ)); > intel_mark_busy(dev_priv->dev); > + intel_runtime_pm_gpu_busy(dev_priv); > } > } > > @@ -2544,10 +2548,12 @@ i915_gem_retire_requests(struct drm_device *dev) > idle &= list_empty(&ring->request_list); > } > > - if (idle) > + if (idle) { > mod_delayed_work(dev_priv->wq, > &dev_priv->mm.idle_work, > msecs_to_jiffies(100)); > + intel_runtime_pm_gpu_idle(dev_priv); > + } > > return idle; > } > @@ -2691,6 +2697,8 @@ out: > * > * @obj: object which may be in use on another ring. > * @to: ring we wish to use the object on. May be NULL. > + * @add_request: do we need to add a request to track operations > + * submitted on ring with sync_to function > * > * This code is meant to abstract object synchronization with the GPU. > * Calling with NULL implies synchronizing the object with the CPU > @@ -2700,7 +2708,7 @@ out: > */ > int > i915_gem_object_sync(struct drm_i915_gem_object *obj, > - struct intel_ring_buffer *to) > + struct intel_ring_buffer *to, bool add_request) > { > struct intel_ring_buffer *from = obj->ring; > u32 seqno; > @@ -2724,12 +2732,15 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj, > > trace_i915_gem_ring_sync_to(from, to, seqno); > ret = to->sync_to(to, from, seqno); > - if (!ret) > + if (!ret) { > /* We use last_read_seqno because sync_to() > * might have just caused seqno wrap under > * the radar. > */ > from->sync_seqno[idx] = obj->last_read_seqno; > + if (add_request) > + i915_add_request_wo_flush(to); > + } > > return ret; > } > @@ -3707,7 +3718,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, > int ret; > > if (pipelined != obj->ring) { > - ret = i915_gem_object_sync(obj, pipelined); > + ret = i915_gem_object_sync(obj, pipelined, true); > if (ret) > return ret; > } > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > index 0c6bcff..bda7a06 100644 > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > @@ -832,7 +832,7 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring, > > list_for_each_entry(vma, vmas, exec_list) { > struct drm_i915_gem_object *obj = vma->obj; > - ret = i915_gem_object_sync(obj, ring); > + ret = i915_gem_object_sync(obj, ring, false); > if (ret) > return ret; > > @@ -969,7 +969,7 @@ i915_gem_execbuffer_retire_commands(struct drm_device *dev, > ring->gpu_caches_dirty = true; > > /* Add a breadcrumb for the completion of the batch buffer */ > - (void)__i915_add_request(ring, file, obj, NULL); > + (void)__i915_add_request(ring, file, obj, NULL, true); > } > > static int > diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c > index ec96002..25eae03 100644 > --- a/drivers/gpu/drm/i915/intel_display.c > +++ b/drivers/gpu/drm/i915/intel_display.c > @@ -8624,6 +8624,7 @@ static int intel_gen7_queue_flip(struct drm_device *dev, > > intel_mark_page_flip_active(intel_crtc); > __intel_ring_advance(ring); > + i915_add_request_wo_flush(ring); > return 0; > > err_unpin: > diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h > index 7b3c209..9061aa7 100644 > --- a/drivers/gpu/drm/i915/intel_drv.h > +++ b/drivers/gpu/drm/i915/intel_drv.h > @@ -881,9 +881,12 @@ void gen6_rps_boost(struct drm_i915_private *dev_priv); > void intel_aux_display_runtime_get(struct drm_i915_private *dev_priv); > void intel_aux_display_runtime_put(struct drm_i915_private *dev_priv); > void intel_runtime_pm_get(struct drm_i915_private *dev_priv); > +void intel_runtime_pm_get_noresume(struct drm_i915_private *dev_priv); > void intel_runtime_pm_put(struct drm_i915_private *dev_priv); > void intel_init_runtime_pm(struct drm_i915_private *dev_priv); > void intel_fini_runtime_pm(struct drm_i915_private *dev_priv); > +void intel_runtime_pm_gpu_busy(struct drm_i915_private *dev_priv); > +void intel_runtime_pm_gpu_idle(struct drm_i915_private *dev_priv); > void ilk_wm_get_hw_state(struct drm_device *dev); > > > diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c > index b9b4fe4..991ff62 100644 > --- a/drivers/gpu/drm/i915/intel_pm.c > +++ b/drivers/gpu/drm/i915/intel_pm.c > @@ -5470,6 +5470,37 @@ void intel_aux_display_runtime_put(struct drm_i915_private *dev_priv) > hsw_enable_package_c8(dev_priv); > } > > +void intel_runtime_pm_gpu_idle(struct drm_i915_private *dev_priv) > +{ > + if (!HAS_RUNTIME_PM(dev_priv->dev)) > + return; > + > + /* don't need a seperate mutex here as callers are > + * already under struct_mutex > + */ > + WARN_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex)); > + if (!dev_priv->pm.gpu_idle) { > + dev_priv->pm.gpu_idle = true; > + /* match with get in gpu_busy */ > + intel_runtime_pm_put(dev_priv); > + } > +} > + > +void intel_runtime_pm_gpu_busy(struct drm_i915_private *dev_priv) > +{ > + if (!HAS_RUNTIME_PM(dev_priv->dev)) > + return; > + > + WARN_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex)); > + if (dev_priv->pm.gpu_idle) { > + dev_priv->pm.gpu_idle = false; > + /* make sure that we keep the GPU on until request list > + * is empty > + */ > + intel_runtime_pm_get_noresume(dev_priv); > + } > +} > + > void intel_runtime_pm_get(struct drm_i915_private *dev_priv) > { > struct drm_device *dev = dev_priv->dev; > @@ -5482,6 +5513,21 @@ void intel_runtime_pm_get(struct drm_i915_private *dev_priv) > WARN(dev_priv->pm.suspended, "Device still suspended.\n"); > } > > +void intel_runtime_pm_get_noresume(struct drm_i915_private *dev_priv) > +{ > + struct drm_device *dev = dev_priv->dev; > + struct device *device = &dev->pdev->dev; > + > + if (!HAS_RUNTIME_PM(dev)) > + return; > + > + /* driver calls no resume when it is sure that device is > + * already active and just want to increment the ref count > + */ > + WARN(dev_priv->pm.suspended, "Device suspended. call get_sync?\n"); > + pm_runtime_get_noresume(device); > +} > + > void intel_runtime_pm_put(struct drm_i915_private *dev_priv) > { > struct drm_device *dev = dev_priv->dev; > @@ -5500,6 +5546,7 @@ void intel_init_runtime_pm(struct drm_i915_private *dev_priv) > struct device *device = &dev->pdev->dev; > > dev_priv->pm.suspended = false; > + dev_priv->pm.gpu_idle = true; > > if (!HAS_RUNTIME_PM(dev)) > return; > -- > 1.8.1.2 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > http://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Paulo Zanoni _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx