Relax the required locking to, instead of disabling irqs, disable tasklets. Allow the caller to perform the locking using utility functions, which allows the caller to combine a number of register accesses within the same critical section. Implement this for capability reads and command buffer submission. Signed-off-by: Thomas Hellstrom <thellstrom@xxxxxxxxxx> Reviewed-by: Sinclair Yeh <syeh@xxxxxxxxxx> --- drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c | 8 +- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 9 +-- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 130 ++++++++++++++++++++++++++++----- drivers/gpu/drm/vmwgfx/vmwgfx_fence.c | 6 +- drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c | 40 +++++++--- drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c | 20 ++--- drivers/gpu/drm/vmwgfx/vmwgfx_irq.c | 2 +- 7 files changed, 163 insertions(+), 52 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c index 8a76821..b8d2436 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c @@ -290,18 +290,20 @@ void vmw_cmdbuf_header_free(struct vmw_cmdbuf_header *header) */ static int vmw_cmdbuf_header_submit(struct vmw_cmdbuf_header *header) { - struct vmw_cmdbuf_man *man = header->man; + struct vmw_private *dev_priv = header->man->dev_priv; u32 val; if (sizeof(header->handle) > 4) val = (header->handle >> 32); else val = 0; - vmw_write(man->dev_priv, SVGA_REG_COMMAND_HIGH, val); + vmw_hw_lock(dev_priv, true); + __vmw_write(dev_priv, SVGA_REG_COMMAND_HIGH, val); val = (header->handle & 0xFFFFFFFFULL); val |= header->cb_context & SVGA_CB_CONTEXT_MASK; - vmw_write(man->dev_priv, SVGA_REG_COMMAND_LOW, val); + __vmw_write(dev_priv, SVGA_REG_COMMAND_LOW, val); + vmw_hw_unlock(dev_priv, true); return header->cb_header->status; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index a09cf85..2a5496a 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -631,7 +631,6 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset) ttm_lock_init(&dev_priv->reservation_sem); spin_lock_init(&dev_priv->hw_lock); spin_lock_init(&dev_priv->waiter_lock); - spin_lock_init(&dev_priv->cap_lock); spin_lock_init(&dev_priv->svga_lock); for (i = vmw_res_context; i < vmw_res_max; ++i) { @@ -854,10 +853,10 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset) } if (dev_priv->has_mob) { - spin_lock(&dev_priv->cap_lock); - vmw_write(dev_priv, SVGA_REG_DEV_CAP, SVGA3D_DEVCAP_DX); - dev_priv->has_dx = !!vmw_read(dev_priv, SVGA_REG_DEV_CAP); - spin_unlock(&dev_priv->cap_lock); + vmw_hw_lock(dev_priv, true); + __vmw_write(dev_priv, SVGA_REG_DEV_CAP, SVGA3D_DEVCAP_DX); + dev_priv->has_dx = !!__vmw_read(dev_priv, SVGA_REG_DEV_CAP); + vmw_hw_unlock(dev_priv, true); } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index a8ae9df..f7dcbcc 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -385,7 +385,6 @@ struct vmw_private { bool has_gmr; bool has_mob; spinlock_t hw_lock; - spinlock_t cap_lock; bool has_dx; /* @@ -546,34 +545,127 @@ static inline struct vmw_master *vmw_master(struct drm_master *master) return (struct vmw_master *) master->driver_priv; } -/* - * The locking here is fine-grained, so that it is performed once - * for every read- and write operation. This is of course costly, but we - * don't perform much register access in the timing critical paths anyway. - * Instead we have the extra benefit of being sure that we don't forget - * the hw lock around register accesses. +/** + * vmw_hw_lock - Perform necessary locking for register access + * + * @dev_priv: Pointer to device private structure + * @lock_bh: Disable bottom halves. Always set to true unless irqs are disabled. */ -static inline void vmw_write(struct vmw_private *dev_priv, - unsigned int offset, uint32_t value) +static inline void vmw_hw_lock(struct vmw_private *dev_priv, + bool lock_bh) { - unsigned long irq_flags; + if (lock_bh) + spin_lock_bh(&dev_priv->hw_lock); + else + spin_lock(&dev_priv->hw_lock); +} - spin_lock_irqsave(&dev_priv->hw_lock, irq_flags); +/** + * vmw_hw_unlock - Perform necessary unlocking after register access + * + * @dev_priv: Pointer to device private structure + * @unlock_bh: Disable bottom halves. Always set to true if it was set to tru + * in the corresponding lock. + */ +static inline void vmw_hw_unlock(struct vmw_private *dev_priv, + bool unlock_bh) +{ + if (unlock_bh) + spin_unlock_bh(&dev_priv->hw_lock); + else + spin_unlock(&dev_priv->hw_lock); +} + +/** + * __vmw_write - Write a value to a device register with external locking. + * + * @dev_priv: Pointer to the device private structure + * @offset: Device register offset + * @value: Value to write + * + * Note that the caller must ensure that when calling this function, + * @dev_priv::hw_lock must be held and that we're safe against racing + * against tasklets. + */ +static inline void __vmw_write(struct vmw_private *dev_priv, + unsigned int offset, uint32_t value) +{ +#ifdef CONFIG_LOCKDEP + lockdep_assert_held_once(&dev_priv->hw_lock.rlock); + WARN_ON_ONCE(!(in_softirq() || irqs_disabled())); +#endif outl(offset, dev_priv->io_start + VMWGFX_INDEX_PORT); outl(value, dev_priv->io_start + VMWGFX_VALUE_PORT); - spin_unlock_irqrestore(&dev_priv->hw_lock, irq_flags); } -static inline uint32_t vmw_read(struct vmw_private *dev_priv, - unsigned int offset) +/** + * __vmw_read - Read a value from a device register with external locking. + * + * @dev_priv: Pointer to the device private structure + * @offset: Device register offset + * @value: Value to write + * + * Note that the caller must ensure that when calling this function, + * @dev_priv::hw_lock is be held and that we're safe against racing + * against tasklets. + */ +static inline uint32_t __vmw_read(struct vmw_private *dev_priv, + unsigned int offset) { - unsigned long irq_flags; u32 val; - spin_lock_irqsave(&dev_priv->hw_lock, irq_flags); +#ifdef CONFIG_LOCKDEP + lockdep_assert_held_once(&dev_priv->hw_lock.rlock); + WARN_ON_ONCE(!(in_softirq() || irqs_disabled())); +#endif outl(offset, dev_priv->io_start + VMWGFX_INDEX_PORT); val = inl(dev_priv->io_start + VMWGFX_VALUE_PORT); - spin_unlock_irqrestore(&dev_priv->hw_lock, irq_flags); + + return val; +} + +/** + * vmw_write - Write a value to a device register + * + * @dev_priv: Pointer to the device private structure + * @offset: Device register offset + * @value: Value to write + * + * This function may not be called when irqs are disabled, since + * its call to vmw_hw_unlock will re-enable irqs. + */ +static inline void vmw_write(struct vmw_private *dev_priv, + unsigned int offset, uint32_t value) +{ +#ifdef CONFIG_LOCKDEP + WARN_ON_ONCE(irqs_disabled()); +#endif + vmw_hw_lock(dev_priv, true); + __vmw_write(dev_priv, offset, value); + vmw_hw_unlock(dev_priv, true); +} + +/** + * vmw_write - Read a value from a device register + * + * @dev_priv: Pointer to the device private structure + * @offset: Device register offset + * @value: Value to write + * + * This function may not be called when irqs are disabled, since + * its call to vmw_hw_unlock will re-enable irqs. + */ +static inline uint32_t vmw_read(struct vmw_private *dev_priv, + unsigned int offset) +{ + u32 val; + +#ifdef CONFIG_LOCKDEP + WARN_ON_ONCE(irqs_disabled()); +#endif + vmw_hw_lock(dev_priv, true); + val = __vmw_read(dev_priv, offset); + vmw_hw_unlock(dev_priv, true); return val; } @@ -722,8 +814,8 @@ extern void vmw_fifo_commit(struct vmw_private *dev_priv, uint32_t bytes); extern void vmw_fifo_commit_flush(struct vmw_private *dev_priv, uint32_t bytes); extern int vmw_fifo_send_fence(struct vmw_private *dev_priv, uint32_t *seqno); -extern void vmw_fifo_ping_host_locked(struct vmw_private *, uint32_t reason); -extern void vmw_fifo_ping_host(struct vmw_private *dev_priv, uint32_t reason); +extern void vmw_fifo_ping_host(struct vmw_private *dev_priv, uint32_t reason, + bool lock_bh); extern bool vmw_fifo_have_3d(struct vmw_private *dev_priv); extern bool vmw_fifo_have_pitchlock(struct vmw_private *dev_priv); extern int vmw_fifo_emit_dummy_query(struct vmw_private *dev_priv, diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c index f40c36e..ac66aad 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c @@ -149,7 +149,7 @@ static bool vmw_fence_enable_signaling(struct fence *f) if (seqno - fence->base.seqno < VMW_FENCE_WRAP) return false; - vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC); + vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC, false); return true; } @@ -183,7 +183,7 @@ static long vmw_fence_wait(struct fence *f, bool intr, signed long timeout) if (likely(vmw_fence_obj_signaled(fence))) return timeout; - vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC); + vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC, true); vmw_seqno_waiter_add(dev_priv); spin_lock_bh(f->lock); @@ -525,7 +525,7 @@ void vmw_fence_obj_flush(struct vmw_fence_obj *fence) { struct vmw_private *dev_priv = fman_from_fence(fence)->dev_priv; - vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC); + vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC, true); } static void vmw_fence_destroy(struct vmw_fence_obj *fence) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c index a8baf5f..a70cc88 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c @@ -49,10 +49,10 @@ bool vmw_fifo_have_3d(struct vmw_private *dev_priv) if (!dev_priv->has_mob) return false; - spin_lock(&dev_priv->cap_lock); - vmw_write(dev_priv, SVGA_REG_DEV_CAP, SVGA3D_DEVCAP_3D); - result = vmw_read(dev_priv, SVGA_REG_DEV_CAP); - spin_unlock(&dev_priv->cap_lock); + vmw_hw_lock(dev_priv, true); + __vmw_write(dev_priv, SVGA_REG_DEV_CAP, SVGA3D_DEVCAP_3D); + result = __vmw_read(dev_priv, SVGA_REG_DEV_CAP); + vmw_hw_unlock(dev_priv, true); return (result != 0); } @@ -163,14 +163,32 @@ int vmw_fifo_init(struct vmw_private *dev_priv, struct vmw_fifo_state *fifo) return 0; } -void vmw_fifo_ping_host(struct vmw_private *dev_priv, uint32_t reason) +/** + * vmw_fifo_ping_host - Notify the device that there are device commands + * to process + * + * @dev_priv: Pointer to a device private struct. + * @reason: A valid reason to ping the host. See device headers. + * @lock_bh: Always set to true unless irqs are disabled in which case + * it must be set to false. + * + * Aka the device "doorbell". The device has an optimization that allows + * avoiding calling the relatively expensive register write to + * SVGA_REG_SYNC if the SVGA_FIFO_BUSY mmio register is set to 1, meaning + * that the doorbell has already been called. The SVGA_FIFO_BUSY mmio register + * will be cleared by the device when a new write to SVGA_REG_SYNC is needed + * to notify the device of pending commands. + */ +void vmw_fifo_ping_host(struct vmw_private *dev_priv, uint32_t reason, + bool lock_bh) { u32 *fifo_mem = dev_priv->mmio_virt; - preempt_disable(); - if (cmpxchg(fifo_mem + SVGA_FIFO_BUSY, 0, 1) == 0) - vmw_write(dev_priv, SVGA_REG_SYNC, reason); - preempt_enable(); + if (cmpxchg(fifo_mem + SVGA_FIFO_BUSY, 0, 1) == 0) { + vmw_hw_lock(dev_priv, lock_bh); + __vmw_write(dev_priv, SVGA_REG_SYNC, reason); + vmw_hw_unlock(dev_priv, lock_bh); + } } void vmw_fifo_release(struct vmw_private *dev_priv, struct vmw_fifo_state *fifo) @@ -256,7 +274,7 @@ static int vmw_fifo_wait(struct vmw_private *dev_priv, if (likely(!vmw_fifo_is_full(dev_priv, bytes))) return 0; - vmw_fifo_ping_host(dev_priv, SVGA_SYNC_FIFOFULL); + vmw_fifo_ping_host(dev_priv, SVGA_SYNC_FIFOFULL, true); if (!(dev_priv->capabilities & SVGA_CAP_IRQMASK)) return vmw_fifo_wait_noirq(dev_priv, bytes, interruptible, timeout); @@ -490,7 +508,7 @@ static void vmw_local_fifo_commit(struct vmw_private *dev_priv, uint32_t bytes) vmw_mmio_write(0, fifo_mem + SVGA_FIFO_RESERVED); mb(); up_write(&fifo_state->rwsem); - vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC); + vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC, true); mutex_unlock(&fifo_state->fifo_mutex); } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c index b8c6a03..8ce0886 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c @@ -159,14 +159,14 @@ static int vmw_fill_compat_cap(struct vmw_private *dev_priv, void *bounce, (pair_offset + max_size * sizeof(SVGA3dCapPair)) / sizeof(u32); compat_cap->header.type = SVGA3DCAPS_RECORD_DEVCAPS; - spin_lock(&dev_priv->cap_lock); + vmw_hw_lock(dev_priv, true); for (i = 0; i < max_size; ++i) { - vmw_write(dev_priv, SVGA_REG_DEV_CAP, i); + __vmw_write(dev_priv, SVGA_REG_DEV_CAP, i); compat_cap->pairs[i][0] = i; compat_cap->pairs[i][1] = vmw_mask_multisample - (i, vmw_read(dev_priv, SVGA_REG_DEV_CAP)); + (i, __vmw_read(dev_priv, SVGA_REG_DEV_CAP)); } - spin_unlock(&dev_priv->cap_lock); + vmw_hw_unlock(dev_priv, true); return 0; } @@ -216,13 +216,13 @@ int vmw_get_cap_3d_ioctl(struct drm_device *dev, void *data, if (num > SVGA3D_DEVCAP_MAX) num = SVGA3D_DEVCAP_MAX; - spin_lock(&dev_priv->cap_lock); + vmw_hw_lock(dev_priv, true); for (i = 0; i < num; ++i) { - vmw_write(dev_priv, SVGA_REG_DEV_CAP, i); + __vmw_write(dev_priv, SVGA_REG_DEV_CAP, i); *bounce32++ = vmw_mask_multisample - (i, vmw_read(dev_priv, SVGA_REG_DEV_CAP)); + (i, __vmw_read(dev_priv, SVGA_REG_DEV_CAP)); } - spin_unlock(&dev_priv->cap_lock); + vmw_hw_unlock(dev_priv, true); } else if (gb_objects) { ret = vmw_fill_compat_cap(dev_priv, bounce, size); if (unlikely(ret != 0)) @@ -420,7 +420,7 @@ unsigned int vmw_fops_poll(struct file *filp, struct poll_table_struct *wait) struct vmw_private *dev_priv = vmw_priv(file_priv->minor->dev); - vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC); + vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC, true); return drm_poll(filp, wait); } @@ -443,6 +443,6 @@ ssize_t vmw_fops_read(struct file *filp, char __user *buffer, struct vmw_private *dev_priv = vmw_priv(file_priv->minor->dev); - vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC); + vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC, true); return drm_read(filp, buffer, count, offset); } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c index 12aaed7..af6970b 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c @@ -246,7 +246,7 @@ int vmw_wait_seqno(struct vmw_private *dev_priv, if (likely(vmw_seqno_passed(dev_priv, seqno))) return 0; - vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC); + vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC, true); if (!(fifo->capabilities & SVGA_FIFO_CAP_FENCE)) return vmw_fallback_wait(dev_priv, lazy, true, seqno, -- 2.4.3 _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/dri-devel