Re: [PATCH 1/3] drm/i915/guc: Add non blocking CTB send function

"Michal Wajdeczko" <michal.wajdeczko@xxxxxxxxx> · Thu, 21 Nov 2019 12:43:26 +0100

On Thu, 21 Nov 2019 00:56:02 +0100, <John.C.Harrison@xxxxxxxxx> wrote:

From: Matthew Brost <matthew.brost@xxxxxxxxx>

Add non blocking CTB send fuction, intel_guc_send_nb. In order to
support a non blocking CTB send fuction a spin lock is needed to

2x typos

protect the CTB descriptors fields. Also the non blocking call must not
update the fence value as this value is owned by the blocking call
(intel_guc_send).

you probably mean "intel_guc_send_ct", as intel_guc_send is just a wrapper
around guc->send


The blocking CTB now must have a flow control mechanism to ensure the
buffer isn't overrun. A lazy spin wait is used as we believe the flow
control condition should be rare with properly sized buffer. A retry
counter is also implemented which fails H2G CTBs once a limit is
reached to prevent deadlock.

The function, intel_guc_send_nb, is exported in this patch but unused.
Several patches later in the series make use of this function.

It's likely in yet another series


Cc: John Harrison <john.c.harrison@xxxxxxxxx>
Signed-off-by: Matthew Brost <matthew.brost@xxxxxxxxx>
---
 drivers/gpu/drm/i915/gt/uc/intel_guc.h    |  2 +
 drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 97 +++++++++++++++++++----
 drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h | 10 ++-
 3 files changed, 91 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h  
b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index e6400204a2bd..77c5af919ace 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -94,6 +94,8 @@ intel_guc_send_and_receive(struct intel_guc *guc,  
const u32 *action, u32 len,
 	return guc->send(guc, action, len, response_buf, response_buf_size);
 }
+int intel_guc_send_nb(struct intel_guc_ct *ct, const u32 *action, u32  
len);
+

Hmm, this mismatch of guc/ct parameter breaks the our layering.
But we can keep this layering intact by introducing some flags to
the existing guc_send() function. These flags could be passed as
high bits in action[0], like this:

#define GUC_ACTION_FLAG_DONT_WAIT 0x80000000

int intel_guc_auth_huc(struct intel_guc *guc, u32 rsa_offset)
{
	u32 action[] = {
		INTEL_GUC_ACTION_AUTHENTICATE_HUC | GUC_ACTION_FLAG_DONT_WAIT,
		rsa_offset
	};

	return intel_guc_send(guc, action, ARRAY_SIZE(action));
}

then actual back-end of guc->send can take proper steps based on this flag:

@@ int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32  
len,
        GEM_BUG_ON(!len);
        GEM_BUG_ON(len > guc->send_regs.count);

+       if (*action & GUC_ACTION_FLAG_DONT_WAIT)
+               return -EINVAL;
+       *action &= ~GUC_ACTION_FLAG_DONT_WAIT;
+
        /* We expect only action code */
        GEM_BUG_ON(*action & ~INTEL_GUC_MSG_CODE_MASK);

@@ @@ int intel_guc_send_ct(struct intel_guc *guc, const u32 *action, u32  
len,
        u32 status = ~0; /* undefined */
        int ret;

+       if (*action & GUC_ACTION_FLAG_DONT_WAIT) {
+               GEM_BUG_ON(response_buf);
+               GEM_BUG_ON(response_buf_size);
+               return ctch_send_nb(ct, ctch, action, len);
+       }
+
        mutex_lock(&guc->send_mutex);

        ret = ctch_send(ct, ctch, action, len, response_buf,  
response_buf_size,


 static inline void intel_guc_notify(struct intel_guc *guc)
 {
 	guc->notify(guc);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c  
b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
index b49115517510..e50d968b15d5 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
@@ -3,6 +3,8 @@
  * Copyright © 2016-2019 Intel Corporation
  */
+#include <linux/circ_buf.h>
+
 #include "i915_drv.h"
 #include "intel_guc_ct.h"
@@ -12,6 +14,8 @@
 #define CT_DEBUG_DRIVER(...)	do { } while (0)
 #endif
+#define MAX_RETRY		0x1000000
+
 struct ct_request {
 	struct list_head link;
 	u32 fence;
@@ -40,7 +44,8 @@ void intel_guc_ct_init_early(struct intel_guc_ct *ct)
 	/* we're using static channel owners */
 	ct->host_channel.owner = CTB_OWNER_HOST;
-	spin_lock_init(&ct->lock);
+	spin_lock_init(&ct->request_lock);
+	spin_lock_init(&ct->send_lock);
 	INIT_LIST_HEAD(&ct->pending_requests);
 	INIT_LIST_HEAD(&ct->incoming_requests);
 	INIT_WORK(&ct->worker, ct_incoming_request_worker_func);
@@ -291,7 +296,8 @@ static u32 ctch_get_next_fence(struct  
intel_guc_ct_channel *ctch)
 static int ctb_write(struct intel_guc_ct_buffer *ctb,
 		     const u32 *action,
 		     u32 len /* in dwords */,
-		     u32 fence,
+		     u32 fence_value,
+		     bool enable_fence,

maybe we can just guarantee that fence=0 will never be used as a valid
fence id, then this flag could be replaced with (fence != 0) check.

 		     bool want_response)
 {
 	struct guc_ct_buffer_desc *desc = ctb->desc;
@@ -328,18 +334,18 @@ static int ctb_write(struct intel_guc_ct_buffer  
*ctb,
 	 * DW2+: action data
 	 */
 	header = (len << GUC_CT_MSG_LEN_SHIFT) |
-		 (GUC_CT_MSG_WRITE_FENCE_TO_DESC) |
+		 (enable_fence ? GUC_CT_MSG_WRITE_FENCE_TO_DESC : 0) |

Hmm, even if we ask fw to do not write back fence to the descriptor,
IIRC current firmware will unconditionally write back return status
of this non-blocking call, possibly overwriting status of the blocked
call.

 		 (want_response ? GUC_CT_MSG_SEND_STATUS : 0) |

btw, if we switch all requests to expect reply send back over CTB,
then we can possibly drop the send_mutex in CTB paths, and block
only when there is no DONT_WAIT flag and we have to wait for response.

 		 (action[0] << GUC_CT_MSG_ACTION_SHIFT);
	CT_DEBUG_DRIVER("CT: writing %*ph %*ph %*ph\n",
-			4, &header, 4, &fence,
+			4, &header, 4, &fence_value,
 			4 * (len - 1), &action[1]);
	cmds[tail] = header;
 	tail = (tail + 1) % size;
-	cmds[tail] = fence;
+	cmds[tail] = fence_value;
 	tail = (tail + 1) % size;
	for (i = 1; i < len; i++) {
@@ -440,6 +446,47 @@ static int wait_for_ct_request_update(struct  
ct_request *req, u32 *status)
 	return err;
 }
+static inline bool ctb_has_room(struct guc_ct_buffer_desc *desc, u32  
len)
+{
+	u32 head = READ_ONCE(desc->head);
+	u32 space;
+
+	space = CIRC_SPACE(desc->tail, head, desc->size);
+
+	return space >= len;
+}
+
+int intel_guc_send_nb(struct intel_guc_ct *ct,
+		      const u32 *action,
+		      u32 len)
+{
+	struct intel_guc_ct_channel *ctch = &ct->host_channel;
+	struct intel_guc_ct_buffer *ctb = &ctch->ctbs[CTB_SEND];
+	struct guc_ct_buffer_desc *desc = ctb->desc;
+	int err;
+
+	GEM_BUG_ON(!ctch->enabled);
+	GEM_BUG_ON(!len);
+	GEM_BUG_ON(len & ~GUC_CT_MSG_LEN_MASK);
+	lockdep_assert_held(&ct->send_lock);

hmm, does it mean that now it's caller responsibility to spinlock
on CT private lock ? That is not how other guc_send() functions work.

+
+	if (unlikely(!ctb_has_room(desc, (len + 1) * 4))) {
+		ct->retry++;
+		if (ct->retry >= MAX_RETRY)
+			return -EDEADLK;
+		else
+			return -EBUSY;
+	}
+
+	ct->retry = 0;
+	err = ctb_write(ctb, action, len, 0, false, false);
+	if (unlikely(err))
+		return err;
+
+	intel_guc_notify(ct_to_guc(ct));
+	return 0;
+}
+
 static int ctch_send(struct intel_guc_ct *ct,
 		     struct intel_guc_ct_channel *ctch,
 		     const u32 *action,
@@ -460,17 +507,35 @@ static int ctch_send(struct intel_guc_ct *ct,
 	GEM_BUG_ON(len & ~GUC_CT_MSG_LEN_MASK);
 	GEM_BUG_ON(!response_buf && response_buf_size);
+	/*
+	 * We use a lazy spin wait loop here as we believe that if the CT
+	 * buffers are sized correctly the flow control condition should be
+	 * rare.
+	 */
+retry:
+	spin_lock_irqsave(&ct->send_lock, flags);
+	if (unlikely(!ctb_has_room(desc, (len + 1) * 4))) {
+		spin_unlock_irqrestore(&ct->send_lock, flags);
+		ct->retry++;
+		if (ct->retry >= MAX_RETRY)
+			return -EDEADLK;

I'm not sure what's better: have secret deadlock hard to reproduce,
or deadlocks easier to catch that helps improve to be deadlock-clean

+		cpu_relax();
+		goto retry;
+	}
+
+	ct->retry = 0;
 	fence = ctch_get_next_fence(ctch);
 	request.fence = fence;
 	request.status = 0;
 	request.response_len = response_buf_size;
 	request.response_buf = response_buf;
-	spin_lock_irqsave(&ct->lock, flags);
+	spin_lock(&ct->request_lock);
 	list_add_tail(&request.link, &ct->pending_requests);
-	spin_unlock_irqrestore(&ct->lock, flags);
+	spin_unlock(&ct->request_lock);
-	err = ctb_write(ctb, action, len, fence, !!response_buf);
+	err = ctb_write(ctb, action, len, fence, true, !!response_buf);
+	spin_unlock_irqrestore(&ct->send_lock, flags);
 	if (unlikely(err))
 		goto unlink;
@@ -501,9 +566,9 @@ static int ctch_send(struct intel_guc_ct *ct,
 	}
unlink:
-	spin_lock_irqsave(&ct->lock, flags);
+	spin_lock_irqsave(&ct->request_lock, flags);
 	list_del(&request.link);
-	spin_unlock_irqrestore(&ct->lock, flags);
+	spin_unlock_irqrestore(&ct->request_lock, flags);
	return err;
 }
@@ -653,7 +718,7 @@ static int ct_handle_response(struct intel_guc_ct  
*ct, const u32 *msg)
	CT_DEBUG_DRIVER("CT: response fence %u status %#x\n", fence, status);
-	spin_lock(&ct->lock);
+	spin_lock(&ct->request_lock);
 	list_for_each_entry(req, &ct->pending_requests, link) {
 		if (unlikely(fence != req->fence)) {
 			CT_DEBUG_DRIVER("CT: request %u awaits response\n",
@@ -672,7 +737,7 @@ static int ct_handle_response(struct intel_guc_ct  
*ct, const u32 *msg)
 		found = true;
 		break;
 	}
-	spin_unlock(&ct->lock);
+	spin_unlock(&ct->request_lock);
	if (!found)
 		DRM_ERROR("CT: unsolicited response %*ph\n", 4 * msglen, msg);
@@ -710,13 +775,13 @@ static bool ct_process_incoming_requests(struct  
intel_guc_ct *ct)
 	u32 *payload;
 	bool done;
-	spin_lock_irqsave(&ct->lock, flags);
+	spin_lock_irqsave(&ct->request_lock, flags);
 	request = list_first_entry_or_null(&ct->incoming_requests,
 					   struct ct_incoming_request, link);
 	if (request)
 		list_del(&request->link);
 	done = !!list_empty(&ct->incoming_requests);
-	spin_unlock_irqrestore(&ct->lock, flags);
+	spin_unlock_irqrestore(&ct->request_lock, flags);
	if (!request)
 		return true;
@@ -777,9 +842,9 @@ static int ct_handle_request(struct intel_guc_ct  
*ct, const u32 *msg)
 	}
 	memcpy(request->msg, msg, 4 * msglen);
-	spin_lock_irqsave(&ct->lock, flags);
+	spin_lock_irqsave(&ct->request_lock, flags);
 	list_add_tail(&request->link, &ct->incoming_requests);
-	spin_unlock_irqrestore(&ct->lock, flags);
+	spin_unlock_irqrestore(&ct->request_lock, flags);
	queue_work(system_unbound_wq, &ct->worker);
 	return 0;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h  
b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
index 7c24d83f5c24..bc670a796bd8 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
@@ -62,8 +62,11 @@ struct intel_guc_ct {
 	struct intel_guc_ct_channel host_channel;
 	/* other channels are tbd */
-	/** @lock: protects pending requests list */
-	spinlock_t lock;
+	/** @request_lock: protects pending requests list */
+	spinlock_t request_lock;
+
+	/** @send_lock: protects h2g channel */
+	spinlock_t send_lock;
	/** @pending_requests: list of requests waiting for response */
 	struct list_head pending_requests;
@@ -73,6 +76,9 @@ struct intel_guc_ct {
	/** @worker: worker for handling incoming requests */
 	struct work_struct worker;
+
+	/** @retry: the number of times a H2G CTB has been retried */
+	u32 retry;
 };
void intel_guc_ct_init_early(struct intel_guc_ct *ct);
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx