From: Alex Dai <yu.dai@xxxxxxxxx>
For now, remove the spinlocks that protected the GuC's
statistics block and work queue; they are only accessed
by code that already holds the global struct_mutex, and
so are redundant (until the big struct_mutex rewrite!).
The specific problem that the spinlocks caused was that
if the work queue was full, the driver would try to
spinwait for one jiffy, but with interrupts disabled the
jiffy count would not advance, leading to a system hang.
The issue was found using test case igt/gem_close_race.
The new version will usleep() instead, still holding
the struct_mutex but without any spinlocks.
v4: Reorganize commit message (Dave Gordon)
v3: Remove unnecessary whitespace churn
v2: Clean up wq_lock too
v1: Clean up host2guc lock as well
Signed-off-by: Alex Dai <yu.dai@xxxxxxxxx>
---
drivers/gpu/drm/i915/i915_debugfs.c | 12 ++++++------
drivers/gpu/drm/i915/i915_guc_submission.c | 31 ++++++------------------------
drivers/gpu/drm/i915/intel_guc.h | 4 ----
3 files changed, 12 insertions(+), 35 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index a728ff1..d6b7817 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2473,15 +2473,15 @@ static int i915_guc_info(struct seq_file *m, void *data)
if (!HAS_GUC_SCHED(dev_priv->dev))
return 0;
+ if (mutex_lock_interruptible(&dev->struct_mutex))
+ return 0;
+
/* Take a local copy of the GuC data, so we can dump it at leisure */
- spin_lock(&dev_priv->guc.host2guc_lock);
guc = dev_priv->guc;
- if (guc.execbuf_client) {
- spin_lock(&guc.execbuf_client->wq_lock);
+ if (guc.execbuf_client)
client = *guc.execbuf_client;
- spin_unlock(&guc.execbuf_client->wq_lock);
- }
- spin_unlock(&dev_priv->guc.host2guc_lock);
+
+ mutex_unlock(&dev->struct_mutex);
seq_printf(m, "GuC total action count: %llu\n", guc.action_count);
seq_printf(m, "GuC action failure count: %u\n", guc.action_fail);
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index ed9f100..a7f9785 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -86,7 +86,6 @@ static int host2guc_action(struct intel_guc *guc, u32 *data, u32 len)
return -EINVAL;
intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
- spin_lock(&dev_priv->guc.host2guc_lock);
dev_priv->guc.action_count += 1;
dev_priv->guc.action_cmd = data[0];
@@ -119,7 +118,6 @@ static int host2guc_action(struct intel_guc *guc, u32 *data, u32 len)
}
dev_priv->guc.action_status = status;
- spin_unlock(&dev_priv->guc.host2guc_lock);
intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
return ret;
@@ -292,16 +290,12 @@ static uint32_t select_doorbell_cacheline(struct intel_guc *guc)
const uint32_t cacheline_size = cache_line_size();
uint32_t offset;
- spin_lock(&guc->host2guc_lock);
-
/* Doorbell uses a single cache line within a page */
offset = offset_in_page(guc->db_cacheline);
/* Moving to next cache line to reduce contention */
guc->db_cacheline += cacheline_size;
- spin_unlock(&guc->host2guc_lock);
-
DRM_DEBUG_DRIVER("selected doorbell cacheline 0x%x, next 0x%x, linesize %u\n",
offset, guc->db_cacheline, cacheline_size);
@@ -322,13 +316,11 @@ static uint16_t assign_doorbell(struct intel_guc *guc, uint32_t priority)
const uint16_t end = start + half;
uint16_t id;
- spin_lock(&guc->host2guc_lock);
id = find_next_zero_bit(guc->doorbell_bitmap, end, start);
if (id == end)
id = GUC_INVALID_DOORBELL_ID;
else
bitmap_set(guc->doorbell_bitmap, id, 1);
- spin_unlock(&guc->host2guc_lock);
DRM_DEBUG_DRIVER("assigned %s priority doorbell id 0x%x\n",
hi_pri ? "high" : "normal", id);
@@ -338,9 +330,7 @@ static uint16_t assign_doorbell(struct intel_guc *guc, uint32_t priority)
static void release_doorbell(struct intel_guc *guc, uint16_t id)
{
- spin_lock(&guc->host2guc_lock);
bitmap_clear(guc->doorbell_bitmap, id, 1);
- spin_unlock(&guc->host2guc_lock);
}
/*
@@ -487,16 +477,13 @@ static int guc_get_workqueue_space(struct i915_guc_client *gc, u32 *offset)
struct guc_process_desc *desc;
void *base;
u32 size = sizeof(struct guc_wq_item);
- int ret = 0, timeout_counter = 200;
+ int ret = -ETIMEDOUT, timeout_counter = 200;
base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, 0));
desc = base + gc->proc_desc_offset;
while (timeout_counter-- > 0) {
- ret = wait_for_atomic(CIRC_SPACE(gc->wq_tail, desc->head,
- gc->wq_size) >= size, 1);
-
- if (!ret) {
+ if (CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size) >= size) {
*offset = gc->wq_tail;
/* advance the tail for next workqueue item */
@@ -505,7 +492,11 @@ static int guc_get_workqueue_space(struct i915_guc_client *gc, u32 *offset)
/* this will break the loop */
timeout_counter = 0;
+ ret = 0;
}
+
+ if (timeout_counter)
+ usleep_range(1000, 2000);