Quoting Michał Winiarski (2017-09-12 13:47:25) > @@ -416,15 +416,15 @@ static void guc_wq_item_append(struct i915_guc_client *client, > struct intel_engine_cs *engine = rq->engine; > struct guc_process_desc *desc = __get_process_desc(client); > struct guc_wq_item *wqi; > - u32 freespace, tail, wq_off; > + u32 freespace, ring_tail, wq_off, wq_next; > > /* Free space is guaranteed */ > - freespace = CIRC_SPACE(client->wq_tail, desc->head, client->wq_size); > + freespace = CIRC_SPACE(desc->tail, desc->head, GUC_WQ_SIZE); > GEM_BUG_ON(freespace < wqi_size); Fwiw, I would move this to the cmpxchg loop. GEM_BUG_ON(CIRC_SPACE(wq_off, READ_ONCE(desc->head), GUC_WQ_SIZE) < wqi_size); > > /* The GuC firmware wants the tail index in QWords, not bytes */ > - tail = intel_ring_set_tail(rq->ring, rq->tail) >> 3; > - GEM_BUG_ON(tail > WQ_RING_TAIL_MAX); > + ring_tail = intel_ring_set_tail(rq->ring, rq->tail) >> 3; > + GEM_BUG_ON(ring_tail > WQ_RING_TAIL_MAX); > > /* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we > * should not have the case where structure wqi is across page, neither > @@ -435,11 +435,12 @@ static void guc_wq_item_append(struct i915_guc_client *client, > */ > BUILD_BUG_ON(wqi_size != 16); > > - /* postincrement WQ tail for next time */ > - wq_off = client->wq_tail; > + /* Find our offset and postincrement WQ tail for next time */ > + do { > + wq_off = desc->tail; wq_off = READ_ONCE(desc->tail); > + wq_next = (wq_off + wqi_size) & (GUC_WQ_SIZE - 1); > + } while (cmpxchg(&desc->tail, wq_off, wq_next) != wq_off); > GEM_BUG_ON(wq_off & (wqi_size - 1)); > - client->wq_tail += wqi_size; > - client->wq_tail &= client->wq_size - 1; > > /* WQ starts from the page after doorbell / process_desc */ > wqi = client->vaddr + wq_off + GUC_DB_SIZE; > @@ -453,7 +454,7 @@ static void guc_wq_item_append(struct i915_guc_client *client, > /* The GuC wants only the low-order word of the context descriptor */ > wqi->context_desc = (u32)intel_lr_context_descriptor(rq->ctx, engine); > > - wqi->submit_element_info = tail << WQ_RING_TAIL_SHIFT; > + wqi->submit_element_info = ring_tail << WQ_RING_TAIL_SHIFT; > wqi->fence_id = rq->global_seqno; > } > > @@ -463,20 +464,14 @@ static void guc_reset_wq(struct i915_guc_client *client) > > desc->head = 0; > desc->tail = 0; > - > - client->wq_tail = 0; > } > > static int guc_ring_doorbell(struct i915_guc_client *client) > { > - struct guc_process_desc *desc = __get_process_desc(client); > union guc_doorbell_qw db_cmp, db_exc, db_ret; > union guc_doorbell_qw *db; > int attempt = 2, ret = -EAGAIN; > > - /* Update the tail so it is visible to GuC */ > - desc->tail = client->wq_tail; > - > /* current cookie */ > db_cmp.db_status = GUC_DOORBELL_ENABLED; > db_cmp.cookie = client->doorbell_cookie; > @@ -535,7 +530,6 @@ static void i915_guc_submit(struct intel_engine_cs *engine) > struct execlist_port *port = engine->execlist_port; > unsigned int engine_id = engine->id; > unsigned int n; > - unsigned long flags; > > for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) { > struct drm_i915_gem_request *rq; > @@ -548,14 +542,10 @@ static void i915_guc_submit(struct intel_engine_cs *engine) > if (i915_vma_is_map_and_fenceable(rq->ring->vma)) > POSTING_READ_FW(GUC_STATUS); > > - spin_lock_irqsave(&client->wq_lock, flags); > - > guc_wq_item_append(client, rq); > WARN_ON(guc_ring_doorbell(client)); > > client->submissions[engine_id] += 1; Per-engine, so this is actually serialized by the tasklet. Hmm, double accounting after reset. But do I care? I consider it to be pointless since we are counting at the wrong boundary. I think we need the READ_ONCE to be clear to both the compiler and ourselves that we are reading transient values shared with the guc. But since that's the only issue I could see, Reviewed-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> -Chris _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx