[PATCH 10/17] drm/amdkfd: Simplify event ID and signal slot management

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Signal slots are identical to event IDs.

Replace the used_slot_bitmap and events hash table with an IDR to
allocate and lookup event IDs and signal slots more efficiently.

Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com>
Acked-by: Oded Gabbay <oded.gabbay at gmail.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_events.c | 230 ++++++++++----------------------
 drivers/gpu/drm/amd/amdkfd/kfd_events.h |  14 +-
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h   |   6 +-
 3 files changed, 80 insertions(+), 170 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 7cc1710..41580e0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -41,24 +41,16 @@ struct kfd_event_waiter {
 	bool activated;		 /* Becomes true when event is signaled */
 };
 
-#define SLOTS_PER_PAGE KFD_SIGNAL_EVENT_LIMIT
-#define SLOT_BITMAP_LONGS BITS_TO_LONGS(SLOTS_PER_PAGE)
-
 /*
- * Over-complicated pooled allocator for event notification slots.
- *
  * Each signal event needs a 64-bit signal slot where the signaler will write
- * a 1 before sending an interrupt.l (This is needed because some interrupts
+ * a 1 before sending an interrupt. (This is needed because some interrupts
  * do not contain enough spare data bits to identify an event.)
- * We get whole pages from vmalloc and map them to the process VA.
- * Individual signal events are then allocated a slot in a page.
+ * We get whole pages and map them to the process VA.
+ * Individual signal events use their event_id as slot index.
  */
-
 struct kfd_signal_page {
 	uint64_t *kernel_address;
 	uint64_t __user *user_address;
-	unsigned int free_slots;
-	unsigned long used_slot_bitmap[SLOT_BITMAP_LONGS];
 };
 
 /*
@@ -73,34 +65,6 @@ static uint64_t *page_slots(struct kfd_signal_page *page)
 	return page->kernel_address;
 }
 
-static bool allocate_free_slot(struct kfd_process *process,
-			       unsigned int *out_slot_index)
-{
-	struct kfd_signal_page *page = process->signal_page;
-	unsigned int slot;
-
-	if (!page || page->free_slots == 0) {
-		pr_debug("No free event signal slots were found for process %p\n",
-			 process);
-
-		return false;
-	}
-
-	slot = find_first_zero_bit(page->used_slot_bitmap, SLOTS_PER_PAGE);
-
-	__set_bit(slot, page->used_slot_bitmap);
-	page->free_slots--;
-
-	page_slots(page)[slot] = UNSIGNALED_EVENT_SLOT;
-
-	*out_slot_index = slot;
-
-	pr_debug("Allocated event signal slot in page %p, slot %d\n",
-		 page, slot);
-
-	return true;
-}
-
 static struct kfd_signal_page *allocate_signal_page(struct kfd_process *p)
 {
 	void *backing_store;
@@ -110,8 +74,6 @@ static struct kfd_signal_page *allocate_signal_page(struct kfd_process *p)
 	if (!page)
 		return NULL;
 
-	page->free_slots = SLOTS_PER_PAGE;
-
 	backing_store = (void *) __get_free_pages(GFP_KERNEL,
 					get_order(KFD_SIGNAL_EVENT_LIMIT * 8));
 	if (!backing_store)
@@ -132,28 +94,26 @@ static struct kfd_signal_page *allocate_signal_page(struct kfd_process *p)
 	return NULL;
 }
 
-static bool allocate_event_notification_slot(struct kfd_process *p,
-					     unsigned int *signal_slot_index)
+static int allocate_event_notification_slot(struct kfd_process *p,
+					    struct kfd_event *ev)
 {
+	int id;
+
 	if (!p->signal_page) {
 		p->signal_page = allocate_signal_page(p);
 		if (!p->signal_page)
-			return false;
+			return -ENOMEM;
 	}
 
-	return allocate_free_slot(p, signal_slot_index);
-}
+	id = idr_alloc(&p->event_idr, ev, 0, KFD_SIGNAL_EVENT_LIMIT,
+		       GFP_KERNEL);
+	if (id < 0)
+		return id;
 
-/* Assumes that the process's event_mutex is locked. */
-static void release_event_notification_slot(struct kfd_signal_page *page,
-						size_t slot_index)
-{
-	__clear_bit(slot_index, page->used_slot_bitmap);
-	page->free_slots++;
+	ev->event_id = id;
+	page_slots(p->signal_page)[id] = UNSIGNALED_EVENT_SLOT;
 
-	/* We don't free signal pages, they are retained by the process
-	 * and reused until it exits.
-	 */
+	return 0;
 }
 
 /*
@@ -162,89 +122,32 @@ static void release_event_notification_slot(struct kfd_signal_page *page,
  */
 static struct kfd_event *lookup_event_by_id(struct kfd_process *p, uint32_t id)
 {
-	struct kfd_event *ev;
-
-	hash_for_each_possible(p->events, ev, events, id)
-		if (ev->event_id == id)
-			return ev;
-
-	return NULL;
-}
-
-/*
- * Produce a kfd event id for a nonsignal event.
- * These are arbitrary numbers, so we do a sequential search through
- * the hash table for an unused number.
- */
-static u32 make_nonsignal_event_id(struct kfd_process *p)
-{
-	u32 id;
-
-	for (id = p->next_nonsignal_event_id;
-		id < KFD_LAST_NONSIGNAL_EVENT_ID &&
-		lookup_event_by_id(p, id);
-		id++)
-		;
-
-	if (id < KFD_LAST_NONSIGNAL_EVENT_ID) {
-
-		/*
-		 * What if id == LAST_NONSIGNAL_EVENT_ID - 1?
-		 * Then next_nonsignal_event_id = LAST_NONSIGNAL_EVENT_ID so
-		 * the first loop fails immediately and we proceed with the
-		 * wraparound loop below.
-		 */
-		p->next_nonsignal_event_id = id + 1;
-
-		return id;
-	}
-
-	for (id = KFD_FIRST_NONSIGNAL_EVENT_ID;
-		id < KFD_LAST_NONSIGNAL_EVENT_ID &&
-		lookup_event_by_id(p, id);
-		id++)
-		;
-
-
-	if (id < KFD_LAST_NONSIGNAL_EVENT_ID) {
-		p->next_nonsignal_event_id = id + 1;
-		return id;
-	}
-
-	p->next_nonsignal_event_id = KFD_FIRST_NONSIGNAL_EVENT_ID;
-	return 0;
-}
-
-static struct kfd_event *lookup_event_by_page_slot(struct kfd_process *p,
-						unsigned int signal_slot)
-{
-	return lookup_event_by_id(p, signal_slot);
+	return idr_find(&p->event_idr, id);
 }
 
 static int create_signal_event(struct file *devkfd,
 				struct kfd_process *p,
 				struct kfd_event *ev)
 {
+	int ret;
+
 	if (p->signal_event_count == KFD_SIGNAL_EVENT_LIMIT) {
 		if (!p->signal_event_limit_reached) {
 			pr_warn("Signal event wasn't created because limit was reached\n");
 			p->signal_event_limit_reached = true;
 		}
-		return -ENOMEM;
+		return -ENOSPC;
 	}
 
-	if (!allocate_event_notification_slot(p, &ev->signal_slot_index)) {
+	ret = allocate_event_notification_slot(p, ev);
+	if (ret) {
 		pr_warn("Signal event wasn't created because out of kernel memory\n");
-		return -ENOMEM;
+		return ret;
 	}
 
 	p->signal_event_count++;
 
-	ev->user_signal_address =
-			&p->signal_page->user_address[ev->signal_slot_index];
-
-	ev->event_id = ev->signal_slot_index;
-
+	ev->user_signal_address = &p->signal_page->user_address[ev->event_id];
 	pr_debug("Signal event number %zu created with id %d, address %p\n",
 			p->signal_event_count, ev->event_id,
 			ev->user_signal_address);
@@ -252,16 +155,20 @@ static int create_signal_event(struct file *devkfd,
 	return 0;
 }
 
-/*
- * No non-signal events are supported yet.
- * We create them as events that never signal.
- * Set event calls from user-mode are failed.
- */
 static int create_other_event(struct kfd_process *p, struct kfd_event *ev)
 {
-	ev->event_id = make_nonsignal_event_id(p);
-	if (ev->event_id == 0)
-		return -ENOMEM;
+	/* Cast KFD_LAST_NONSIGNAL_EVENT to uint32_t. This allows an
+	 * intentional integer overflow to -1 without a compiler
+	 * warning. idr_alloc treats a negative value as "maximum
+	 * signed integer".
+	 */
+	int id = idr_alloc(&p->event_idr, ev, KFD_FIRST_NONSIGNAL_EVENT_ID,
+			   (uint32_t)KFD_LAST_NONSIGNAL_EVENT_ID + 1,
+			   GFP_KERNEL);
+
+	if (id < 0)
+		return id;
+	ev->event_id = id;
 
 	return 0;
 }
@@ -269,9 +176,8 @@ static int create_other_event(struct kfd_process *p, struct kfd_event *ev)
 void kfd_event_init_process(struct kfd_process *p)
 {
 	mutex_init(&p->event_mutex);
-	hash_init(p->events);
+	idr_init(&p->event_idr);
 	p->signal_page = NULL;
-	p->next_nonsignal_event_id = KFD_FIRST_NONSIGNAL_EVENT_ID;
 	p->signal_event_count = 0;
 }
 
@@ -284,25 +190,22 @@ static void destroy_event(struct kfd_process *p, struct kfd_event *ev)
 		waiter->event = NULL;
 	wake_up_all(&ev->wq);
 
-	if ((ev->type == KFD_EVENT_TYPE_SIGNAL ||
-	     ev->type == KFD_EVENT_TYPE_DEBUG) && p->signal_page) {
-		release_event_notification_slot(p->signal_page,
-						ev->signal_slot_index);
+	if (ev->type == KFD_EVENT_TYPE_SIGNAL ||
+	    ev->type == KFD_EVENT_TYPE_DEBUG)
 		p->signal_event_count--;
-	}
 
-	hash_del(&ev->events);
+	idr_remove(&p->event_idr, ev->event_id);
 	kfree(ev);
 }
 
 static void destroy_events(struct kfd_process *p)
 {
 	struct kfd_event *ev;
-	struct hlist_node *tmp;
-	unsigned int hash_bkt;
+	uint32_t id;
 
-	hash_for_each_safe(p->events, hash_bkt, tmp, ev, events)
+	idr_for_each_entry(&p->event_idr, ev, id)
 		destroy_event(p, ev);
+	idr_destroy(&p->event_idr);
 }
 
 /*
@@ -365,7 +268,7 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
 		if (!ret) {
 			*event_page_offset = KFD_MMAP_EVENTS_MASK;
 			*event_page_offset <<= PAGE_SHIFT;
-			*event_slot_index = ev->signal_slot_index;
+			*event_slot_index = ev->event_id;
 		}
 		break;
 	default:
@@ -374,8 +277,6 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
 	}
 
 	if (!ret) {
-		hash_add(p->events, &ev->events, ev->event_id);
-
 		*event_id = ev->event_id;
 		*event_trigger_data = ev->event_id;
 	} else {
@@ -469,17 +370,7 @@ int kfd_reset_event(struct kfd_process *p, uint32_t event_id)
 
 static void acknowledge_signal(struct kfd_process *p, struct kfd_event *ev)
 {
-	page_slots(p->signal_page)[ev->signal_slot_index] =
-						UNSIGNALED_EVENT_SLOT;
-}
-
-static bool is_slot_signaled(struct kfd_process *p, unsigned int index)
-{
-	if (!p->signal_page)
-		return false;
-	else
-		return page_slots(p->signal_page)[index] !=
-			UNSIGNALED_EVENT_SLOT;
+	page_slots(p->signal_page)[ev->event_id] = UNSIGNALED_EVENT_SLOT;
 }
 
 static void set_event_from_interrupt(struct kfd_process *p,
@@ -518,13 +409,31 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id,
 		 * ignore it, but we could use any bits we did receive to
 		 * search faster.
 		 */
-		unsigned int i;
+		uint64_t *slots = page_slots(p->signal_page);
+		uint32_t id;
+
+		if (p->signal_event_count < KFD_SIGNAL_EVENT_LIMIT/2) {
+			/* With relatively few events, it's faster to
+			 * iterate over the event IDR
+			 */
+			idr_for_each_entry(&p->event_idr, ev, id) {
+				if (id >= KFD_SIGNAL_EVENT_LIMIT)
+					break;
 
-		for (i = 0; i < SLOTS_PER_PAGE; i++)
-			if (is_slot_signaled(p, i)) {
-				ev = lookup_event_by_page_slot(p, i);
-				set_event_from_interrupt(p, ev);
+				if (slots[id] != UNSIGNALED_EVENT_SLOT)
+					set_event_from_interrupt(p, ev);
 			}
+		} else {
+			/* With relatively many events, it's faster to
+			 * iterate over the signal slots and lookup
+			 * only signaled events from the IDR.
+			 */
+			for (id = 0; id < KFD_SIGNAL_EVENT_LIMIT; id++)
+				if (slots[id] != UNSIGNALED_EVENT_SLOT) {
+					ev = lookup_event_by_id(p, id);
+					set_event_from_interrupt(p, ev);
+				}
+		}
 	}
 
 	mutex_unlock(&p->event_mutex);
@@ -836,12 +745,13 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p,
 {
 	struct kfd_hsa_memory_exception_data *ev_data;
 	struct kfd_event *ev;
-	int bkt;
+	uint32_t id;
 	bool send_signal = true;
 
 	ev_data = (struct kfd_hsa_memory_exception_data *) event_data;
 
-	hash_for_each(p->events, bkt, ev, events)
+	id = KFD_FIRST_NONSIGNAL_EVENT_ID;
+	idr_for_each_entry_continue(&p->event_idr, ev, id)
 		if (ev->type == type) {
 			send_signal = false;
 			dev_dbg(kfd_device,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_events.h
index f85fcee..abca5bf 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.h
@@ -31,9 +31,13 @@
 #include "kfd_priv.h"
 #include <uapi/linux/kfd_ioctl.h>
 
-#define KFD_EVENT_ID_NONSIGNAL_MASK 0x80000000U
-#define KFD_FIRST_NONSIGNAL_EVENT_ID KFD_EVENT_ID_NONSIGNAL_MASK
-#define KFD_LAST_NONSIGNAL_EVENT_ID UINT_MAX
+/*
+ * IDR supports non-negative integer IDs. Small IDs are used for
+ * signal events to match their signal slot. Use the upper half of the
+ * ID space for non-signal events.
+ */
+#define KFD_FIRST_NONSIGNAL_EVENT_ID ((INT_MAX >> 1) + 1)
+#define KFD_LAST_NONSIGNAL_EVENT_ID INT_MAX
 
 /*
  * Written into kfd_signal_slot_t to indicate that the event is not signaled.
@@ -47,9 +51,6 @@ struct kfd_event_waiter;
 struct signal_page;
 
 struct kfd_event {
-	/* All events in process, rooted at kfd_process.events. */
-	struct hlist_node events;
-
 	u32 event_id;
 
 	bool signaled;
@@ -60,7 +61,6 @@ struct kfd_event {
 	wait_queue_head_t wq; /* List of event waiters. */
 
 	/* Only for signal events. */
-	unsigned int signal_slot_index;
 	uint64_t __user *user_signal_address;
 
 	/* type specific data */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index c1b3ee2..ebae8e1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -31,6 +31,7 @@
 #include <linux/workqueue.h>
 #include <linux/spinlock.h>
 #include <linux/kfd_ioctl.h>
+#include <linux/idr.h>
 #include <kgd_kfd_interface.h>
 
 #include "amd_shared.h"
@@ -538,11 +539,10 @@ struct kfd_process {
 
 	/* Event-related data */
 	struct mutex event_mutex;
-	/* All events in process hashed by ID, linked on kfd_event.events. */
-	DECLARE_HASHTABLE(events, 4);
+	/* Event ID allocator and lookup */
+	struct idr event_idr;
 	/* Event page */
 	struct kfd_signal_page *signal_page;
-	u32 next_nonsignal_event_id;
 	size_t signal_event_count;
 	bool signal_event_limit_reached;
 };
-- 
2.7.4



[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux