[PATCH 2/6] drm/amdkfd: KFD interrupt access ih_fifo data in-place

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



To handle 40000 to 80000 interrupts per second running CPX mode with 4
streams/queues per KFD node, KFD interrupt handler becomes the
performance bottleneck.

Remove the kfifo_out memcpy overhead by accessing ih_fifo data in-place
and updating rptr with kfifo_skip_count.

Signed-off-by: Philip Yang <Philip.Yang@xxxxxxx>
Reviewed-by: Felix Kuehling <felix.kuehling@xxxxxxx>
---
 drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c | 35 +++++++++-------------
 1 file changed, 14 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
index 9b6b6e882593..e7412de9a0ac 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
@@ -114,50 +114,43 @@ void kfd_interrupt_exit(struct kfd_node *node)
  */
 bool enqueue_ih_ring_entry(struct kfd_node *node, const void *ih_ring_entry)
 {
-	int count;
-
-	count = kfifo_in(&node->ih_fifo, ih_ring_entry,
-				node->kfd->device_info.ih_ring_entry_size);
-	if (count != node->kfd->device_info.ih_ring_entry_size) {
+	if (kfifo_is_full(&node->ih_fifo)) {
 		dev_dbg_ratelimited(node->adev->dev,
-			"Interrupt ring overflow, dropping interrupt %d\n",
-			count);
+				    "Interrupt ring overflow, dropping interrupt\n");
 		return false;
 	}
 
+	kfifo_in(&node->ih_fifo, ih_ring_entry, node->kfd->device_info.ih_ring_entry_size);
 	return true;
 }
 
 /*
  * Assumption: single reader/writer. This function is not re-entrant
  */
-static bool dequeue_ih_ring_entry(struct kfd_node *node, void *ih_ring_entry)
+static bool dequeue_ih_ring_entry(struct kfd_node *node, u32 **ih_ring_entry)
 {
 	int count;
 
-	count = kfifo_out(&node->ih_fifo, ih_ring_entry,
-				node->kfd->device_info.ih_ring_entry_size);
-
-	WARN_ON(count && count != node->kfd->device_info.ih_ring_entry_size);
+	if (kfifo_is_empty(&node->ih_fifo))
+		return false;
 
+	count = kfifo_out_linear_ptr(&node->ih_fifo, ih_ring_entry,
+				     node->kfd->device_info.ih_ring_entry_size);
+	WARN_ON(count != node->kfd->device_info.ih_ring_entry_size);
 	return count == node->kfd->device_info.ih_ring_entry_size;
 }
 
 static void interrupt_wq(struct work_struct *work)
 {
-	struct kfd_node *dev = container_of(work, struct kfd_node,
-						interrupt_work);
-	uint32_t ih_ring_entry[KFD_MAX_RING_ENTRY_SIZE];
+	struct kfd_node *dev = container_of(work, struct kfd_node, interrupt_work);
+	uint32_t *ih_ring_entry;
 	unsigned long start_jiffies = jiffies;
 
-	if (dev->kfd->device_info.ih_ring_entry_size > sizeof(ih_ring_entry)) {
-		dev_err_once(dev->adev->dev, "Ring entry too small\n");
-		return;
-	}
-
-	while (dequeue_ih_ring_entry(dev, ih_ring_entry)) {
+	while (dequeue_ih_ring_entry(dev, &ih_ring_entry)) {
 		dev->kfd->device_info.event_interrupt_class->interrupt_wq(dev,
 								ih_ring_entry);
+		kfifo_skip_count(&dev->ih_fifo, dev->kfd->device_info.ih_ring_entry_size);
+
 		if (time_is_before_jiffies(start_jiffies + HZ)) {
 			/* If we spent more than a second processing signals,
 			 * reschedule the worker to avoid soft-lockup warnings
-- 
2.47.1




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux