Signal slots are identical to event IDs. Replace the used_slot_bitmap and events hash table with an IDR to allocate and lookup event IDs and signal slots more efficiently. Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com> --- drivers/gpu/drm/amd/amdkfd/kfd_events.c | 230 ++++++++++---------------------- drivers/gpu/drm/amd/amdkfd/kfd_events.h | 14 +- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 6 +- 3 files changed, 80 insertions(+), 170 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index f800e48..cddd4b9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -41,24 +41,16 @@ struct kfd_event_waiter { bool activated; /* Becomes true when event is signaled */ }; -#define SLOTS_PER_PAGE KFD_SIGNAL_EVENT_LIMIT -#define SLOT_BITMAP_LONGS BITS_TO_LONGS(SLOTS_PER_PAGE) - /* - * Over-complicated pooled allocator for event notification slots. - * * Each signal event needs a 64-bit signal slot where the signaler will write - * a 1 before sending an interrupt.l (This is needed because some interrupts + * a 1 before sending an interrupt. (This is needed because some interrupts * do not contain enough spare data bits to identify an event.) - * We get whole pages from vmalloc and map them to the process VA. - * Individual signal events are then allocated a slot in a page. + * We get whole pages and map them to the process VA. + * Individual signal events use their event_id as slot index. */ - struct kfd_signal_page { uint64_t *kernel_address; uint64_t __user *user_address; - unsigned int free_slots; - unsigned long used_slot_bitmap[SLOT_BITMAP_LONGS]; }; /* @@ -73,34 +65,6 @@ static uint64_t *page_slots(struct kfd_signal_page *page) return page->kernel_address; } -static bool allocate_free_slot(struct kfd_process *process, - unsigned int *out_slot_index) -{ - struct kfd_signal_page *page = process->signal_page; - unsigned int slot; - - if (!page || page->free_slots == 0) { - pr_debug("No free event signal slots were found for process %p\n", - process); - - return false; - } - - slot = find_first_zero_bit(page->used_slot_bitmap, SLOTS_PER_PAGE); - - __set_bit(slot, page->used_slot_bitmap); - page->free_slots--; - - page_slots(page)[slot] = UNSIGNALED_EVENT_SLOT; - - *out_slot_index = slot; - - pr_debug("Allocated event signal slot in page %p, slot %d\n", - page, slot); - - return true; -} - static struct kfd_signal_page *allocate_signal_page(struct kfd_process *p) { void *backing_store; @@ -110,8 +74,6 @@ static struct kfd_signal_page *allocate_signal_page(struct kfd_process *p) if (!page) return NULL; - page->free_slots = SLOTS_PER_PAGE; - backing_store = (void *) __get_free_pages(GFP_KERNEL, get_order(KFD_SIGNAL_EVENT_LIMIT * 8)); if (!backing_store) @@ -132,28 +94,26 @@ static struct kfd_signal_page *allocate_signal_page(struct kfd_process *p) return NULL; } -static bool allocate_event_notification_slot(struct kfd_process *p, - unsigned int *signal_slot_index) +static int allocate_event_notification_slot(struct kfd_process *p, + struct kfd_event *ev) { + int id; + if (!p->signal_page) { p->signal_page = allocate_signal_page(p); if (!p->signal_page) - return false; + return -ENOMEM; } - return allocate_free_slot(p, signal_slot_index); -} + id = idr_alloc(&p->event_idr, ev, 0, KFD_SIGNAL_EVENT_LIMIT, + GFP_KERNEL); + if (id < 0) + return id; -/* Assumes that the process's event_mutex is locked. */ -static void release_event_notification_slot(struct kfd_signal_page *page, - size_t slot_index) -{ - __clear_bit(slot_index, page->used_slot_bitmap); - page->free_slots++; + ev->event_id = id; + page_slots(p->signal_page)[id] = UNSIGNALED_EVENT_SLOT; - /* We don't free signal pages, they are retained by the process - * and reused until it exits. - */ + return 0; } /* @@ -162,89 +122,32 @@ static void release_event_notification_slot(struct kfd_signal_page *page, */ static struct kfd_event *lookup_event_by_id(struct kfd_process *p, uint32_t id) { - struct kfd_event *ev; - - hash_for_each_possible(p->events, ev, events, id) - if (ev->event_id == id) - return ev; - - return NULL; -} - -/* - * Produce a kfd event id for a nonsignal event. - * These are arbitrary numbers, so we do a sequential search through - * the hash table for an unused number. - */ -static u32 make_nonsignal_event_id(struct kfd_process *p) -{ - u32 id; - - for (id = p->next_nonsignal_event_id; - id < KFD_LAST_NONSIGNAL_EVENT_ID && - lookup_event_by_id(p, id); - id++) - ; - - if (id < KFD_LAST_NONSIGNAL_EVENT_ID) { - - /* - * What if id == LAST_NONSIGNAL_EVENT_ID - 1? - * Then next_nonsignal_event_id = LAST_NONSIGNAL_EVENT_ID so - * the first loop fails immediately and we proceed with the - * wraparound loop below. - */ - p->next_nonsignal_event_id = id + 1; - - return id; - } - - for (id = KFD_FIRST_NONSIGNAL_EVENT_ID; - id < KFD_LAST_NONSIGNAL_EVENT_ID && - lookup_event_by_id(p, id); - id++) - ; - - - if (id < KFD_LAST_NONSIGNAL_EVENT_ID) { - p->next_nonsignal_event_id = id + 1; - return id; - } - - p->next_nonsignal_event_id = KFD_FIRST_NONSIGNAL_EVENT_ID; - return 0; -} - -static struct kfd_event *lookup_event_by_page_slot(struct kfd_process *p, - unsigned int signal_slot) -{ - return lookup_event_by_id(p, signal_slot); + return idr_find(&p->event_idr, id); } static int create_signal_event(struct file *devkfd, struct kfd_process *p, struct kfd_event *ev) { + int ret; + if (p->signal_event_count == KFD_SIGNAL_EVENT_LIMIT) { if (!p->signal_event_limit_reached) { pr_warn("Signal event wasn't created because limit was reached\n"); p->signal_event_limit_reached = true; } - return -ENOMEM; + return -ENOSPC; } - if (!allocate_event_notification_slot(p, &ev->signal_slot_index)) { + ret = allocate_event_notification_slot(p, ev); + if (ret) { pr_warn("Signal event wasn't created because out of kernel memory\n"); - return -ENOMEM; + return ret; } p->signal_event_count++; - ev->user_signal_address = - &p->signal_page->user_address[ev->signal_slot_index]; - - ev->event_id = ev->signal_slot_index; - + ev->user_signal_address = &p->signal_page->user_address[ev->event_id]; pr_debug("Signal event number %zu created with id %d, address %p\n", p->signal_event_count, ev->event_id, ev->user_signal_address); @@ -252,16 +155,20 @@ static int create_signal_event(struct file *devkfd, return 0; } -/* - * No non-signal events are supported yet. - * We create them as events that never signal. - * Set event calls from user-mode are failed. - */ static int create_other_event(struct kfd_process *p, struct kfd_event *ev) { - ev->event_id = make_nonsignal_event_id(p); - if (ev->event_id == 0) - return -ENOMEM; + /* Cast KFD_LAST_NONSIGNAL_EVENT to uint32_t. This allows an + * intentional integer overflow to -1 without a compiler + * warning. idr_alloc treats a negative value as "maximum + * signed integer". + */ + int id = idr_alloc(&p->event_idr, ev, KFD_FIRST_NONSIGNAL_EVENT_ID, + (uint32_t)KFD_LAST_NONSIGNAL_EVENT_ID + 1, + GFP_KERNEL); + + if (id < 0) + return id; + ev->event_id = id; return 0; } @@ -269,9 +176,8 @@ static int create_other_event(struct kfd_process *p, struct kfd_event *ev) void kfd_event_init_process(struct kfd_process *p) { mutex_init(&p->event_mutex); - hash_init(p->events); + idr_init(&p->event_idr); p->signal_page = NULL; - p->next_nonsignal_event_id = KFD_FIRST_NONSIGNAL_EVENT_ID; p->signal_event_count = 0; } @@ -284,25 +190,22 @@ static void destroy_event(struct kfd_process *p, struct kfd_event *ev) waiter->event = NULL; wake_up_all(&ev->wq); - if ((ev->type == KFD_EVENT_TYPE_SIGNAL || - ev->type == KFD_EVENT_TYPE_DEBUG) && p->signal_page) { - release_event_notification_slot(p->signal_page, - ev->signal_slot_index); + if (ev->type == KFD_EVENT_TYPE_SIGNAL || + ev->type == KFD_EVENT_TYPE_DEBUG) p->signal_event_count--; - } - hash_del(&ev->events); + idr_remove(&p->event_idr, ev->event_id); kfree(ev); } static void destroy_events(struct kfd_process *p) { struct kfd_event *ev; - struct hlist_node *tmp; - unsigned int hash_bkt; + uint32_t id; - hash_for_each_safe(p->events, hash_bkt, tmp, ev, events) + idr_for_each_entry(&p->event_idr, ev, id) destroy_event(p, ev); + idr_destroy(&p->event_idr); } /* @@ -365,7 +268,7 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p, if (!ret) { *event_page_offset = KFD_MMAP_EVENTS_MASK; *event_page_offset <<= PAGE_SHIFT; - *event_slot_index = ev->signal_slot_index; + *event_slot_index = ev->event_id; } break; default: @@ -374,8 +277,6 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p, } if (!ret) { - hash_add(p->events, &ev->events, ev->event_id); - *event_id = ev->event_id; *event_trigger_data = ev->event_id; } else { @@ -465,17 +366,7 @@ int kfd_reset_event(struct kfd_process *p, uint32_t event_id) static void acknowledge_signal(struct kfd_process *p, struct kfd_event *ev) { - page_slots(p->signal_page)[ev->signal_slot_index] = - UNSIGNALED_EVENT_SLOT; -} - -static bool is_slot_signaled(struct kfd_process *p, unsigned int index) -{ - if (!p->signal_page) - return false; - else - return page_slots(p->signal_page)[index] != - UNSIGNALED_EVENT_SLOT; + page_slots(p->signal_page)[ev->event_id] = UNSIGNALED_EVENT_SLOT; } static void set_event_from_interrupt(struct kfd_process *p, @@ -514,13 +405,31 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, * ignore it, but we could use any bits we did receive to * search faster. */ - unsigned int i; + uint64_t *slots = page_slots(p->signal_page); + uint32_t id; + + if (p->signal_event_count < KFD_SIGNAL_EVENT_LIMIT/2) { + /* With relatively few events, it's faster to + * iterate over the event IDR + */ + idr_for_each_entry(&p->event_idr, ev, id) { + if (id >= KFD_SIGNAL_EVENT_LIMIT) + break; - for (i = 0; i < SLOTS_PER_PAGE; i++) - if (is_slot_signaled(p, i)) { - ev = lookup_event_by_page_slot(p, i); - set_event_from_interrupt(p, ev); + if (slots[id] != UNSIGNALED_EVENT_SLOT) + set_event_from_interrupt(p, ev); } + } else { + /* With relatively many events, it's faster to + * iterate over the signal slots and lookup + * only signaled events from the IDR. + */ + for (id = 0; id < KFD_SIGNAL_EVENT_LIMIT; id++) + if (slots[id] != UNSIGNALED_EVENT_SLOT) { + ev = lookup_event_by_id(p, id); + set_event_from_interrupt(p, ev); + } + } } mutex_unlock(&p->event_mutex); @@ -832,12 +741,13 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p, { struct kfd_hsa_memory_exception_data *ev_data; struct kfd_event *ev; - int bkt; + uint32_t id; bool send_signal = true; ev_data = (struct kfd_hsa_memory_exception_data *) event_data; - hash_for_each(p->events, bkt, ev, events) + id = KFD_FIRST_NONSIGNAL_EVENT_ID; + idr_for_each_entry_continue(&p->event_idr, ev, id) if (ev->type == type) { send_signal = false; dev_dbg(kfd_device, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_events.h index f85fcee..abca5bf 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.h @@ -31,9 +31,13 @@ #include "kfd_priv.h" #include <uapi/linux/kfd_ioctl.h> -#define KFD_EVENT_ID_NONSIGNAL_MASK 0x80000000U -#define KFD_FIRST_NONSIGNAL_EVENT_ID KFD_EVENT_ID_NONSIGNAL_MASK -#define KFD_LAST_NONSIGNAL_EVENT_ID UINT_MAX +/* + * IDR supports non-negative integer IDs. Small IDs are used for + * signal events to match their signal slot. Use the upper half of the + * ID space for non-signal events. + */ +#define KFD_FIRST_NONSIGNAL_EVENT_ID ((INT_MAX >> 1) + 1) +#define KFD_LAST_NONSIGNAL_EVENT_ID INT_MAX /* * Written into kfd_signal_slot_t to indicate that the event is not signaled. @@ -47,9 +51,6 @@ struct kfd_event_waiter; struct signal_page; struct kfd_event { - /* All events in process, rooted at kfd_process.events. */ - struct hlist_node events; - u32 event_id; bool signaled; @@ -60,7 +61,6 @@ struct kfd_event { wait_queue_head_t wq; /* List of event waiters. */ /* Only for signal events. */ - unsigned int signal_slot_index; uint64_t __user *user_signal_address; /* type specific data */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index c1b3ee2..ebae8e1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -31,6 +31,7 @@ #include <linux/workqueue.h> #include <linux/spinlock.h> #include <linux/kfd_ioctl.h> +#include <linux/idr.h> #include <kgd_kfd_interface.h> #include "amd_shared.h" @@ -538,11 +539,10 @@ struct kfd_process { /* Event-related data */ struct mutex event_mutex; - /* All events in process hashed by ID, linked on kfd_event.events. */ - DECLARE_HASHTABLE(events, 4); + /* Event ID allocator and lookup */ + struct idr event_idr; /* Event page */ struct kfd_signal_page *signal_page; - u32 next_nonsignal_event_id; size_t signal_event_count; bool signal_event_limit_reached; }; -- 2.7.4