Re: [PATCH] nouveau: Add irq waiting as alternative to busywait

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, Jul 13, 2012 at 11:35 PM, Maarten Lankhorst
<m.b.lankhorst@xxxxxxxxx> wrote:
> A way to trigger an irq will be needed for optimus support since
> cpu-waiting isn't always viable there. This could also be nice for
> power saving on since cpu would no longer have to spin, and
> performance might improve slightly on cpu-limited workloads.
>
> Some way to quantify these effects would be nice, even if the
> end result would be 'no performance regression'. An earlier
> version always emitted an interrupt, resulting in glxgears going
> from 8k fps to 7k. However this is no longer the case, as I'm
> using the kernel submission channel for generating irqs as
> needed now.
>
> On nv84 I'm using NOTIFY_INTR, but that might have been
> removed on fermi, so instead I'm using invalid command
> 0x0058 now as a way to signal completion.

Out of curiosity, isn't this like a handcoded version of software
methods? If so, why handcoded? Or are software methods not supported
on NVC0?

>
> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@xxxxxxxxxxxxx>
>
> ---
>  drivers/gpu/drm/nouveau/nouveau_drv.h   |    2 +
>  drivers/gpu/drm/nouveau/nouveau_fence.c |   49 ++++++++++++++++++++++++++++---
>  drivers/gpu/drm/nouveau/nouveau_fifo.h  |    1 +
>  drivers/gpu/drm/nouveau/nouveau_state.c |    1 +
>  drivers/gpu/drm/nouveau/nv04_fifo.c     |   25 ++++++++++++++++
>  drivers/gpu/drm/nouveau/nv84_fence.c    |   18 +++++++++--
>  drivers/gpu/drm/nouveau/nvc0_fence.c    |   12 ++++++--
>  drivers/gpu/drm/nouveau/nvc0_fifo.c     |    3 +-
>  drivers/gpu/drm/nouveau/nve0_fifo.c     |   15 +++++++--
>  9 files changed, 110 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
> index f97a1a7..d9d274d 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_drv.h
> +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
> @@ -707,6 +707,7 @@ struct drm_nouveau_private {
>                 struct drm_mm heap;
>                 struct nouveau_bo *bo;
>         } fence;
> +       wait_queue_head_t fence_wq;
>
>         struct {
>                 spinlock_t lock;
> @@ -1656,6 +1657,7 @@ nv44_graph_class(struct drm_device *dev)
>  #define NV84_SUBCHAN_WRCACHE_FLUSH                                   0x00000024
>  #define NV10_SUBCHAN_REF_CNT                                         0x00000050
>  #define NVSW_SUBCHAN_PAGE_FLIP                                       0x00000054
> +#define NVSW_SUBCHAN_FENCE_WAKE                                      0x00000058
>  #define NV11_SUBCHAN_DMA_SEMAPHORE                                   0x00000060
>  #define NV11_SUBCHAN_SEMAPHORE_OFFSET                                0x00000064
>  #define NV11_SUBCHAN_SEMAPHORE_ACQUIRE                               0x00000068
> diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c
> index 3c18049..3ba8dee 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_fence.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
> @@ -68,7 +68,7 @@ nouveau_fence_update(struct nouveau_channel *chan)
>
>         spin_lock(&fctx->lock);
>         list_for_each_entry_safe(fence, fnext, &fctx->pending, head) {
> -               if (priv->read(chan) < fence->sequence)
> +               if (priv->read(chan) - fence->sequence >= 0x80000000U)
>                         break;
>
>                 if (fence->work)
> @@ -111,11 +111,9 @@ nouveau_fence_done(struct nouveau_fence *fence)
>         return !fence->channel;
>  }
>
> -int
> -nouveau_fence_wait(struct nouveau_fence *fence, bool lazy, bool intr)
> +static int nouveau_fence_wait_busy(struct nouveau_fence *fence, bool lazy, bool intr)
>  {
>         unsigned long sleep_time = NSEC_PER_MSEC / 1000;
> -       ktime_t t;
>         int ret = 0;
>
>         while (!nouveau_fence_done(fence)) {
> @@ -127,7 +125,7 @@ nouveau_fence_wait(struct nouveau_fence *fence, bool lazy, bool intr)
>                 __set_current_state(intr ? TASK_INTERRUPTIBLE :
>                                            TASK_UNINTERRUPTIBLE);
>                 if (lazy) {
> -                       t = ktime_set(0, sleep_time);
> +                       ktime_t t = ktime_set(0, sleep_time);
>                         schedule_hrtimeout(&t, HRTIMER_MODE_REL);
>                         sleep_time *= 2;
>                         if (sleep_time > NSEC_PER_MSEC)
> @@ -144,6 +142,47 @@ nouveau_fence_wait(struct nouveau_fence *fence, bool lazy, bool intr)
>         return ret;
>  }
>
> +static int nouveau_fence_wait_event(struct nouveau_fence *fence, bool intr)
> +{
> +       struct drm_nouveau_private *dev_priv = fence->channel->dev->dev_private;
> +       unsigned long timeout = fence->timeout;
> +       int ret = 0;
> +       struct nouveau_channel *chan = dev_priv->channel;
> +       struct nouveau_channel *prev = fence->channel;
> +       struct nouveau_fence_priv *priv = nv_engine(chan->dev, NVOBJ_ENGINE_FENCE);
> +
> +       if (nouveau_fence_done(fence))
> +               return 0;
> +
> +       if (!timeout)
> +               timeout = jiffies + 3 * DRM_HZ;
> +
> +       if (prev != chan)
> +               ret = priv->sync(fence, prev, chan);
> +       if (ret)
> +               goto busy;
> +
> +       if (intr)
> +               ret = wait_event_interruptible_timeout(dev_priv->fence_wq, nouveau_fence_done(fence), timeout);
> +       else
> +               ret = wait_event_timeout(dev_priv->fence_wq, nouveau_fence_done(fence), timeout);
> +
> +       return ret;
> +
> +busy:
> +       return nouveau_fence_wait_busy(fence, true, intr);
> +}
> +
> +int
> +nouveau_fence_wait(struct nouveau_fence *fence, bool lazy, bool intr)
> +{
> +       struct drm_nouveau_private *dev_priv = fence->channel->dev->dev_private;
> +       if (dev_priv->chipset >= 0x84 && dev_priv->channel && lazy)
> +               return nouveau_fence_wait_event(fence, intr);
> +       else
> +               return nouveau_fence_wait_busy(fence, lazy, intr);
> +}
> +
>  int
>  nouveau_fence_sync(struct nouveau_fence *fence, struct nouveau_channel *chan)
>  {
> diff --git a/drivers/gpu/drm/nouveau/nouveau_fifo.h b/drivers/gpu/drm/nouveau/nouveau_fifo.h
> index ce99cab..942e211 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_fifo.h
> +++ b/drivers/gpu/drm/nouveau/nouveau_fifo.h
> @@ -15,6 +15,7 @@ int  nv04_fifo_fini(struct drm_device *, int, bool);
>  int  nv04_fifo_init(struct drm_device *, int);
>  void nv04_fifo_isr(struct drm_device *);
>  void nv04_fifo_destroy(struct drm_device *, int);
> +bool nouveau_fifo_wakeup(struct drm_device *dev, u32 chid);
>
>  void nv50_fifo_playlist_update(struct drm_device *);
>  void nv50_fifo_destroy(struct drm_device *, int);
> diff --git a/drivers/gpu/drm/nouveau/nouveau_state.c b/drivers/gpu/drm/nouveau/nouveau_state.c
> index 19706f0..6cdfacb 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_state.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_state.c
> @@ -515,6 +515,7 @@ nouveau_card_channel_init(struct drm_device *dev)
>         dev_priv->channel = chan;
>         if (ret)
>                 return ret;
> +       init_waitqueue_head(&dev_priv->fence_wq);
>         mutex_unlock(&dev_priv->channel->mutex);
>
>         nouveau_bo_move_init(chan);
> diff --git a/drivers/gpu/drm/nouveau/nv04_fifo.c b/drivers/gpu/drm/nouveau/nv04_fifo.c
> index a6295cd..1665a82 100644
> --- a/drivers/gpu/drm/nouveau/nv04_fifo.c
> +++ b/drivers/gpu/drm/nouveau/nv04_fifo.c
> @@ -307,6 +307,26 @@ out:
>         return handled;
>  }
>
> +bool nouveau_fifo_wakeup(struct drm_device *dev, u32 chid)
> +{
> +       struct nouveau_fifo_priv *pfifo = nv_engine(dev, NVOBJ_ENGINE_FIFO);
> +       struct drm_nouveau_private *dev_priv = dev->dev_private;
> +       struct nouveau_channel *chan = NULL;
> +       bool handled = false;
> +       unsigned long flags;
> +
> +       spin_lock_irqsave(&dev_priv->channels.lock, flags);
> +       if (likely(chid >= 0 && chid < pfifo->channels)) {
> +               chan = dev_priv->channel;
> +               if (chan->id == chid) {
> +                       wake_up_all(&dev_priv->fence_wq);
> +                       handled = true;
> +               }
> +       }
> +       spin_unlock_irqrestore(&dev_priv->channels.lock, flags);
> +       return handled;
> +}
> +
>  static const char *nv_dma_state_err(u32 state)
>  {
>         static const char * const desc[] = {
> @@ -448,6 +468,11 @@ nv04_fifo_isr(struct drm_device *dev)
>                                 status &= ~0x00000010;
>                                 nv_wr32(dev, 0x002100, 0x00000010);
>                         }
> +                       if (status & 0x80000000) {
> +                               nouveau_fifo_wakeup(dev, chid);
> +                               status &= ~0x80000000;
> +                               nv_wr32(dev, 0x002100, 0x80000000);
> +                       }
>                 }
>
>                 if (status) {
> diff --git a/drivers/gpu/drm/nouveau/nv84_fence.c b/drivers/gpu/drm/nouveau/nv84_fence.c
> index c2f889b..2b8c3e5 100644
> --- a/drivers/gpu/drm/nouveau/nv84_fence.c
> +++ b/drivers/gpu/drm/nouveau/nv84_fence.c
> @@ -42,15 +42,20 @@ static int
>  nv84_fence_emit(struct nouveau_fence *fence)
>  {
>         struct nouveau_channel *chan = fence->channel;
> -       int ret = RING_SPACE(chan, 7);
> +       struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
> +       bool intr = dev_priv->channel == chan;
> +
> +       int ret = RING_SPACE(chan, 7 + intr);
>         if (ret == 0) {
>                 BEGIN_NV04(chan, 0, NV11_SUBCHAN_DMA_SEMAPHORE, 1);
>                 OUT_RING  (chan, NvSema);
> -               BEGIN_NV04(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4);
> +               BEGIN_NV04(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4 + intr);
>                 OUT_RING  (chan, upper_32_bits(chan->id * 16));
>                 OUT_RING  (chan, lower_32_bits(chan->id * 16));
>                 OUT_RING  (chan, fence->sequence);
>                 OUT_RING  (chan, NV84_SUBCHAN_SEMAPHORE_TRIGGER_WRITE_LONG);
> +               if (intr)
> +                       OUT_RING  (chan, 0);
>                 FIRE_RING (chan);
>         }
>         return ret;
> @@ -61,15 +66,20 @@ static int
>  nv84_fence_sync(struct nouveau_fence *fence,
>                 struct nouveau_channel *prev, struct nouveau_channel *chan)
>  {
> -       int ret = RING_SPACE(chan, 7);
> +       struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
> +       bool intr = dev_priv->channel == chan;
> +
> +       int ret = RING_SPACE(chan, 7 + intr);
>         if (ret == 0) {
>                 BEGIN_NV04(chan, 0, NV11_SUBCHAN_DMA_SEMAPHORE, 1);
>                 OUT_RING  (chan, NvSema);
> -               BEGIN_NV04(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4);
> +               BEGIN_NV04(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4 + intr);
>                 OUT_RING  (chan, upper_32_bits(prev->id * 16));
>                 OUT_RING  (chan, lower_32_bits(prev->id * 16));
>                 OUT_RING  (chan, fence->sequence);
>                 OUT_RING  (chan, NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_GEQUAL);
> +               if (intr)
> +                       OUT_RING  (chan, 0);
>                 FIRE_RING (chan);
>         }
>         return ret;
> diff --git a/drivers/gpu/drm/nouveau/nvc0_fence.c b/drivers/gpu/drm/nouveau/nvc0_fence.c
> index 47ab388..25805ef 100644
> --- a/drivers/gpu/drm/nouveau/nvc0_fence.c
> +++ b/drivers/gpu/drm/nouveau/nvc0_fence.c
> @@ -45,15 +45,19 @@ nvc0_fence_emit(struct nouveau_fence *fence)
>         struct nouveau_channel *chan = fence->channel;
>         struct nvc0_fence_chan *fctx = chan->engctx[NVOBJ_ENGINE_FENCE];
>         u64 addr = fctx->vma.offset + chan->id * 16;
> +       struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
> +       bool intr = dev_priv->channel == chan;
>         int ret;
>
> -       ret = RING_SPACE(chan, 5);
> +       ret = RING_SPACE(chan, 5 + intr);
>         if (ret == 0) {
>                 BEGIN_NVC0(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4);
>                 OUT_RING  (chan, upper_32_bits(addr));
>                 OUT_RING  (chan, lower_32_bits(addr));
>                 OUT_RING  (chan, fence->sequence);
>                 OUT_RING  (chan, NV84_SUBCHAN_SEMAPHORE_TRIGGER_WRITE_LONG);
> +               if (intr)
> +                       BEGIN_IMC0(chan, 0, 0x058, 0);
>                 FIRE_RING (chan);
>         }
>
> @@ -64,11 +68,13 @@ static int
>  nvc0_fence_sync(struct nouveau_fence *fence,
>                 struct nouveau_channel *prev, struct nouveau_channel *chan)
>  {
> +       struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
>         struct nvc0_fence_chan *fctx = chan->engctx[NVOBJ_ENGINE_FENCE];
>         u64 addr = fctx->vma.offset + prev->id * 16;
> +       bool intr = dev_priv->channel == chan;
>         int ret;
>
> -       ret = RING_SPACE(chan, 5);
> +       ret = RING_SPACE(chan, 5 + intr);
>         if (ret == 0) {
>                 BEGIN_NVC0(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4);
>                 OUT_RING  (chan, upper_32_bits(addr));
> @@ -76,6 +82,8 @@ nvc0_fence_sync(struct nouveau_fence *fence,
>                 OUT_RING  (chan, fence->sequence);
>                 OUT_RING  (chan, NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_GEQUAL |
>                                  NVC0_SUBCHAN_SEMAPHORE_TRIGGER_YIELD);
> +               if (intr)
> +                       BEGIN_IMC0(chan, 0, NVSW_SUBCHAN_FENCE_WAKE, 0);
>                 FIRE_RING (chan);
>         }
>
> diff --git a/drivers/gpu/drm/nouveau/nvc0_fifo.c b/drivers/gpu/drm/nouveau/nvc0_fifo.c
> index 7d85553..b3930c7 100644
> --- a/drivers/gpu/drm/nouveau/nvc0_fifo.c
> +++ b/drivers/gpu/drm/nouveau/nvc0_fifo.c
> @@ -356,7 +356,8 @@ nvc0_fifo_isr_subfifo_intr(struct drm_device *dev, int unit)
>                 if (mthd == 0x0054) {
>                         if (!nvc0_fifo_page_flip(dev, chid))
>                                 show &= ~0x00200000;
> -               }
> +               } else if (mthd == 0x0058 && nouveau_fifo_wakeup(dev, chid))
> +                       show &= ~0x00200000;
>         }
>
>         if (show) {
> diff --git a/drivers/gpu/drm/nouveau/nve0_fifo.c b/drivers/gpu/drm/nouveau/nve0_fifo.c
> index 1855ecb..33e41cd 100644
> --- a/drivers/gpu/drm/nouveau/nve0_fifo.c
> +++ b/drivers/gpu/drm/nouveau/nve0_fifo.c
> @@ -303,11 +303,18 @@ nve0_fifo_isr_subfifo_intr(struct drm_device *dev, int unit)
>         u32 chid = nv_rd32(dev, 0x040120 + (unit * 0x2000)) & 0x7f;
>         u32 subc = (addr & 0x00070000);
>         u32 mthd = (addr & 0x00003ffc);
> +       u32 show = stat;
>
> -       NV_INFO(dev, "PSUBFIFO %d:", unit);
> -       nouveau_bitfield_print(nve0_fifo_subfifo_intr, stat);
> -       NV_INFO(dev, "PSUBFIFO %d: ch %d subc %d mthd 0x%04x data 0x%08x\n",
> -               unit, chid, subc, mthd, data);
> +       if (stat & 0x00200000 && mthd == 0x0058 &&
> +           nouveau_fifo_wakeup(dev, chid))
> +               show &= ~0x00200000;
> +
> +       if (show) {
> +               NV_INFO(dev, "PSUBFIFO %d:", unit);
> +               nouveau_bitfield_print(nve0_fifo_subfifo_intr, show);
> +               NV_INFO(dev, "PSUBFIFO %d: ch %d subc %d mthd 0x%04x data 0x%08x\n",
> +                       unit, chid, subc, mthd, data);
> +       }
>
>         nv_wr32(dev, 0x0400c0 + (unit * 0x2000), 0x80600008);
>         nv_wr32(dev, 0x040108 + (unit * 0x2000), stat);
>
> _______________________________________________
> dri-devel mailing list
> dri-devel@xxxxxxxxxxxxxxxxxxxxx
> http://lists.freedesktop.org/mailman/listinfo/dri-devel



-- 
Far away from the primal instinct, the song seems to fade away, the
river get wider between your thoughts and the things we do and say.
_______________________________________________
dri-devel mailing list
dri-devel@xxxxxxxxxxxxxxxxxxxxx
http://lists.freedesktop.org/mailman/listinfo/dri-devel


[Index of Archives]     [Linux DRI Users]     [Linux Intel Graphics]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [XFree86]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux