Re: [RFC 2/6] drm/i915: cover ring access with rpm get/put

Paulo Zanoni <przanoni@xxxxxxxxx> · Wed, 22 Jan 2014 11:39:18 -0200

2014/1/22  <naresh.kumar.kachhi@xxxxxxxxx>:
> From: Naresh Kumar Kachhi <naresh.kumar.kachhi@xxxxxxxxx>
>
> GPU idleness is tracked by checking the request queue. Whenever
> request queue is empty we assume that GPU is idle. When a new
> set of commands sheduled on ring we call i915_add_request to
> make sure these commands are tracked properly. However there are
> few places which are not being treacked currently.
> This patch introduces a new function add_request_wo_flush to track
> such requests. add_request_wo_flush is same as add_request, only
> difference is that it will not cause a flush. This is to avoid
> any extra overhead while adding new request.
>
> To make sure Gfx is in D0 while there are still commands pending
> on ring following is done.
> - All the ioctls are already covered with get/put this makes sure
>   at the time of scheduling commands on GPU, Gfx is in D0
> - Once command scheduling is done, we call add_request to track
>   ring activity.
> - We call get_noresume if this is first request (ioctl is already
>   covered with get_sync).
> - put is called only when request_list becomes empty. i.e GPU is
>   idle and there are no pending commands on the rings
>
> Note: Make sure we don't do multiple add_request in same
> ioctl/callback only one in the end is enough
>

The PC8 code already has an infrastructure to track GPU idleness, and
I already submitted a patch to move that code to
dev_priv->pm.gpu_idle. Please see this patch:
http://patchwork.freedesktop.org/patch/16952/ . I suggest you should
try to reuse this infrastructure, and if you find any problems, please
add testcases to pm_pc8.c reproducing these problems.

> Signed-off-by: Naresh Kumar Kachhi <naresh.kumar.kachhi@xxxxxxxxx>
> ---
>  drivers/gpu/drm/i915/i915_dma.c            |  5 ++++
>  drivers/gpu/drm/i915/i915_drv.h            | 10 +++++--
>  drivers/gpu/drm/i915/i915_gem.c            | 27 ++++++++++++-----
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c |  4 +--
>  drivers/gpu/drm/i915/intel_display.c       |  1 +
>  drivers/gpu/drm/i915/intel_drv.h           |  3 ++
>  drivers/gpu/drm/i915/intel_pm.c            | 47 ++++++++++++++++++++++++++++++
>  7 files changed, 84 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index ee9502b..b5af745 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -454,6 +454,7 @@ static int i915_dispatch_cmdbuffer(struct drm_device * dev,
>                                    struct drm_clip_rect *cliprects,
>                                    void *cmdbuf)
>  {
> +       struct drm_i915_private *dev_priv = dev->dev_private;
>         int nbox = cmd->num_cliprects;
>         int i = 0, count, ret;
>
> @@ -480,6 +481,7 @@ static int i915_dispatch_cmdbuffer(struct drm_device * dev,
>         }
>
>         i915_emit_breadcrumb(dev);
> +       i915_add_request_wo_flush(LP_RING(dev_priv));
>         return 0;
>  }
>
> @@ -542,6 +544,7 @@ static int i915_dispatch_batchbuffer(struct drm_device * dev,
>         }
>
>         i915_emit_breadcrumb(dev);
> +       i915_add_request_wo_flush(LP_RING(dev_priv));
>         return 0;
>  }
>
> @@ -595,6 +598,7 @@ static int i915_dispatch_flip(struct drm_device * dev)
>                 ADVANCE_LP_RING();
>         }
>
> +       i915_add_request_wo_flush(LP_RING(dev_priv));
>         master_priv->sarea_priv->pf_current_page = dev_priv->dri1.current_page;
>         return 0;
>  }
> @@ -768,6 +772,7 @@ static int i915_emit_irq(struct drm_device * dev)
>                 OUT_RING(dev_priv->dri1.counter);
>                 OUT_RING(MI_USER_INTERRUPT);
>                 ADVANCE_LP_RING();
> +               i915_add_request_wo_flush(LP_RING(dev_priv));
>         }
>
>         return dev_priv->dri1.counter;
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 56c720b..d1399f9 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1324,6 +1324,7 @@ struct i915_package_c8 {
>
>  struct i915_runtime_pm {
>         bool suspended;
> +       bool gpu_idle;
>  };
>
>  enum intel_pipe_crc_source {
> @@ -2063,7 +2064,7 @@ static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
>
>  int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
>  int i915_gem_object_sync(struct drm_i915_gem_object *obj,
> -                        struct intel_ring_buffer *to);
> +                        struct intel_ring_buffer *to, bool add_request);
>  void i915_vma_move_to_active(struct i915_vma *vma,
>                              struct intel_ring_buffer *ring);
>  int i915_gem_dumb_create(struct drm_file *file_priv,
> @@ -2139,9 +2140,12 @@ int __must_check i915_gem_suspend(struct drm_device *dev);
>  int __i915_add_request(struct intel_ring_buffer *ring,
>                        struct drm_file *file,
>                        struct drm_i915_gem_object *batch_obj,
> -                      u32 *seqno);
> +                      u32 *seqno,
> +                      bool flush_caches);
>  #define i915_add_request(ring, seqno) \
> -       __i915_add_request(ring, NULL, NULL, seqno)
> +       __i915_add_request(ring, NULL, NULL, seqno, true)
> +#define i915_add_request_wo_flush(ring) \
> +       __i915_add_request(ring, NULL, NULL, NULL, false)
>  int __must_check i915_wait_seqno(struct intel_ring_buffer *ring,
>                                  uint32_t seqno);
>  int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 024e454..3e8202e 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2136,7 +2136,8 @@ i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
>  int __i915_add_request(struct intel_ring_buffer *ring,
>                        struct drm_file *file,
>                        struct drm_i915_gem_object *obj,
> -                      u32 *out_seqno)
> +                      u32 *out_seqno,
> +                      bool flush_caches)
>  {
>         drm_i915_private_t *dev_priv = ring->dev->dev_private;
>         struct drm_i915_gem_request *request;
> @@ -2152,9 +2153,11 @@ int __i915_add_request(struct intel_ring_buffer *ring,
>          * is that the flush _must_ happen before the next request, no matter
>          * what.
>          */
> -       ret = intel_ring_flush_all_caches(ring);
> -       if (ret)
> -               return ret;
> +       if (flush_caches) {
> +               ret = intel_ring_flush_all_caches(ring);
> +               if (ret)
> +                       return ret;
> +       }
>
>         request = ring->preallocated_lazy_request;
>         if (WARN_ON(request == NULL))
> @@ -2219,6 +2222,7 @@ int __i915_add_request(struct intel_ring_buffer *ring,
>                                            &dev_priv->mm.retire_work,
>                                            round_jiffies_up_relative(HZ));
>                         intel_mark_busy(dev_priv->dev);
> +                       intel_runtime_pm_gpu_busy(dev_priv);
>                 }
>         }
>
> @@ -2544,10 +2548,12 @@ i915_gem_retire_requests(struct drm_device *dev)
>                 idle &= list_empty(&ring->request_list);
>         }
>
> -       if (idle)
> +       if (idle) {
>                 mod_delayed_work(dev_priv->wq,
>                                    &dev_priv->mm.idle_work,
>                                    msecs_to_jiffies(100));
> +               intel_runtime_pm_gpu_idle(dev_priv);
> +       }
>
>         return idle;
>  }
> @@ -2691,6 +2697,8 @@ out:
>   *
>   * @obj: object which may be in use on another ring.
>   * @to: ring we wish to use the object on. May be NULL.
> + * @add_request: do we need to add a request to track operations
> + *    submitted on ring with sync_to function
>   *
>   * This code is meant to abstract object synchronization with the GPU.
>   * Calling with NULL implies synchronizing the object with the CPU
> @@ -2700,7 +2708,7 @@ out:
>   */
>  int
>  i915_gem_object_sync(struct drm_i915_gem_object *obj,
> -                    struct intel_ring_buffer *to)
> +                    struct intel_ring_buffer *to, bool add_request)
>  {
>         struct intel_ring_buffer *from = obj->ring;
>         u32 seqno;
> @@ -2724,12 +2732,15 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj,
>
>         trace_i915_gem_ring_sync_to(from, to, seqno);
>         ret = to->sync_to(to, from, seqno);
> -       if (!ret)
> +       if (!ret) {
>                 /* We use last_read_seqno because sync_to()
>                  * might have just caused seqno wrap under
>                  * the radar.
>                  */
>                 from->sync_seqno[idx] = obj->last_read_seqno;
> +               if (add_request)
> +                       i915_add_request_wo_flush(to);
> +       }
>
>         return ret;
>  }
> @@ -3707,7 +3718,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
>         int ret;
>
>         if (pipelined != obj->ring) {
> -               ret = i915_gem_object_sync(obj, pipelined);
> +               ret = i915_gem_object_sync(obj, pipelined, true);
>                 if (ret)
>                         return ret;
>         }
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 0c6bcff..bda7a06 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -832,7 +832,7 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
>
>         list_for_each_entry(vma, vmas, exec_list) {
>                 struct drm_i915_gem_object *obj = vma->obj;
> -               ret = i915_gem_object_sync(obj, ring);
> +               ret = i915_gem_object_sync(obj, ring, false);
>                 if (ret)
>                         return ret;
>
> @@ -969,7 +969,7 @@ i915_gem_execbuffer_retire_commands(struct drm_device *dev,
>         ring->gpu_caches_dirty = true;
>
>         /* Add a breadcrumb for the completion of the batch buffer */
> -       (void)__i915_add_request(ring, file, obj, NULL);
> +       (void)__i915_add_request(ring, file, obj, NULL, true);
>  }
>
>  static int
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index ec96002..25eae03 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -8624,6 +8624,7 @@ static int intel_gen7_queue_flip(struct drm_device *dev,
>
>         intel_mark_page_flip_active(intel_crtc);
>         __intel_ring_advance(ring);
> +       i915_add_request_wo_flush(ring);
>         return 0;
>
>  err_unpin:
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index 7b3c209..9061aa7 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -881,9 +881,12 @@ void gen6_rps_boost(struct drm_i915_private *dev_priv);
>  void intel_aux_display_runtime_get(struct drm_i915_private *dev_priv);
>  void intel_aux_display_runtime_put(struct drm_i915_private *dev_priv);
>  void intel_runtime_pm_get(struct drm_i915_private *dev_priv);
> +void intel_runtime_pm_get_noresume(struct drm_i915_private *dev_priv);
>  void intel_runtime_pm_put(struct drm_i915_private *dev_priv);
>  void intel_init_runtime_pm(struct drm_i915_private *dev_priv);
>  void intel_fini_runtime_pm(struct drm_i915_private *dev_priv);
> +void intel_runtime_pm_gpu_busy(struct drm_i915_private *dev_priv);
> +void intel_runtime_pm_gpu_idle(struct drm_i915_private *dev_priv);
>  void ilk_wm_get_hw_state(struct drm_device *dev);
>
>
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index b9b4fe4..991ff62 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -5470,6 +5470,37 @@ void intel_aux_display_runtime_put(struct drm_i915_private *dev_priv)
>         hsw_enable_package_c8(dev_priv);
>  }
>
> +void intel_runtime_pm_gpu_idle(struct drm_i915_private *dev_priv)
> +{
> +       if (!HAS_RUNTIME_PM(dev_priv->dev))
> +               return;
> +
> +       /* don't need a seperate mutex here as callers are
> +        * already under struct_mutex
> +        */
> +       WARN_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex));
> +       if (!dev_priv->pm.gpu_idle) {
> +               dev_priv->pm.gpu_idle = true;
> +               /* match with get in gpu_busy */
> +               intel_runtime_pm_put(dev_priv);
> +       }
> +}
> +
> +void intel_runtime_pm_gpu_busy(struct drm_i915_private *dev_priv)
> +{
> +       if (!HAS_RUNTIME_PM(dev_priv->dev))
> +               return;
> +
> +       WARN_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex));
> +       if (dev_priv->pm.gpu_idle) {
> +               dev_priv->pm.gpu_idle = false;
> +               /* make sure that we keep the GPU on until request list
> +                * is empty
> +                */
> +               intel_runtime_pm_get_noresume(dev_priv);
> +       }
> +}
> +
>  void intel_runtime_pm_get(struct drm_i915_private *dev_priv)
>  {
>         struct drm_device *dev = dev_priv->dev;
> @@ -5482,6 +5513,21 @@ void intel_runtime_pm_get(struct drm_i915_private *dev_priv)
>         WARN(dev_priv->pm.suspended, "Device still suspended.\n");
>  }
>
> +void intel_runtime_pm_get_noresume(struct drm_i915_private *dev_priv)
> +{
> +       struct drm_device *dev = dev_priv->dev;
> +       struct device *device = &dev->pdev->dev;
> +
> +       if (!HAS_RUNTIME_PM(dev))
> +               return;
> +
> +       /* driver calls no resume when it is sure that device is
> +        * already active and just want to increment the ref count
> +        */
> +       WARN(dev_priv->pm.suspended, "Device suspended. call get_sync?\n");
> +       pm_runtime_get_noresume(device);
> +}
> +
>  void intel_runtime_pm_put(struct drm_i915_private *dev_priv)
>  {
>         struct drm_device *dev = dev_priv->dev;
> @@ -5500,6 +5546,7 @@ void intel_init_runtime_pm(struct drm_i915_private *dev_priv)
>         struct device *device = &dev->pdev->dev;
>
>         dev_priv->pm.suspended = false;
> +       dev_priv->pm.gpu_idle = true;
>
>         if (!HAS_RUNTIME_PM(dev))
>                 return;
> --
> 1.8.1.2
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Paulo Zanoni
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
http://lists.freedesktop.org/mailman/listinfo/intel-gfx