On Sat, Oct 21, 2017 at 3:23 AM, Felix Kuehling <Felix.Kuehling at amd.com> wrote: > Signal slots are identical to event IDs. > > Replace the used_slot_bitmap and events hash table with an IDR to > allocate and lookup event IDs and signal slots more efficiently. > > Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com> > --- > drivers/gpu/drm/amd/amdkfd/kfd_events.c | 230 ++++++++++---------------------- > drivers/gpu/drm/amd/amdkfd/kfd_events.h | 14 +- > drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 6 +- > 3 files changed, 80 insertions(+), 170 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c > index f800e48..cddd4b9 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c > @@ -41,24 +41,16 @@ struct kfd_event_waiter { > bool activated; /* Becomes true when event is signaled */ > }; > > -#define SLOTS_PER_PAGE KFD_SIGNAL_EVENT_LIMIT > -#define SLOT_BITMAP_LONGS BITS_TO_LONGS(SLOTS_PER_PAGE) > - > /* > - * Over-complicated pooled allocator for event notification slots. > - * > * Each signal event needs a 64-bit signal slot where the signaler will write > - * a 1 before sending an interrupt.l (This is needed because some interrupts > + * a 1 before sending an interrupt. (This is needed because some interrupts > * do not contain enough spare data bits to identify an event.) > - * We get whole pages from vmalloc and map them to the process VA. > - * Individual signal events are then allocated a slot in a page. > + * We get whole pages and map them to the process VA. > + * Individual signal events use their event_id as slot index. > */ > - > struct kfd_signal_page { > uint64_t *kernel_address; > uint64_t __user *user_address; > - unsigned int free_slots; > - unsigned long used_slot_bitmap[SLOT_BITMAP_LONGS]; > }; > > /* > @@ -73,34 +65,6 @@ static uint64_t *page_slots(struct kfd_signal_page *page) > return page->kernel_address; > } > > -static bool allocate_free_slot(struct kfd_process *process, > - unsigned int *out_slot_index) > -{ > - struct kfd_signal_page *page = process->signal_page; > - unsigned int slot; > - > - if (!page || page->free_slots == 0) { > - pr_debug("No free event signal slots were found for process %p\n", > - process); > - > - return false; > - } > - > - slot = find_first_zero_bit(page->used_slot_bitmap, SLOTS_PER_PAGE); > - > - __set_bit(slot, page->used_slot_bitmap); > - page->free_slots--; > - > - page_slots(page)[slot] = UNSIGNALED_EVENT_SLOT; > - > - *out_slot_index = slot; > - > - pr_debug("Allocated event signal slot in page %p, slot %d\n", > - page, slot); > - > - return true; > -} > - > static struct kfd_signal_page *allocate_signal_page(struct kfd_process *p) > { > void *backing_store; > @@ -110,8 +74,6 @@ static struct kfd_signal_page *allocate_signal_page(struct kfd_process *p) > if (!page) > return NULL; > > - page->free_slots = SLOTS_PER_PAGE; > - > backing_store = (void *) __get_free_pages(GFP_KERNEL, > get_order(KFD_SIGNAL_EVENT_LIMIT * 8)); > if (!backing_store) > @@ -132,28 +94,26 @@ static struct kfd_signal_page *allocate_signal_page(struct kfd_process *p) > return NULL; > } > > -static bool allocate_event_notification_slot(struct kfd_process *p, > - unsigned int *signal_slot_index) > +static int allocate_event_notification_slot(struct kfd_process *p, > + struct kfd_event *ev) > { > + int id; > + > if (!p->signal_page) { > p->signal_page = allocate_signal_page(p); > if (!p->signal_page) > - return false; > + return -ENOMEM; > } > > - return allocate_free_slot(p, signal_slot_index); > -} > + id = idr_alloc(&p->event_idr, ev, 0, KFD_SIGNAL_EVENT_LIMIT, > + GFP_KERNEL); > + if (id < 0) > + return id; > > -/* Assumes that the process's event_mutex is locked. */ > -static void release_event_notification_slot(struct kfd_signal_page *page, > - size_t slot_index) > -{ > - __clear_bit(slot_index, page->used_slot_bitmap); > - page->free_slots++; > + ev->event_id = id; > + page_slots(p->signal_page)[id] = UNSIGNALED_EVENT_SLOT; > > - /* We don't free signal pages, they are retained by the process > - * and reused until it exits. > - */ > + return 0; > } > > /* > @@ -162,89 +122,32 @@ static void release_event_notification_slot(struct kfd_signal_page *page, > */ > static struct kfd_event *lookup_event_by_id(struct kfd_process *p, uint32_t id) > { > - struct kfd_event *ev; > - > - hash_for_each_possible(p->events, ev, events, id) > - if (ev->event_id == id) > - return ev; > - > - return NULL; > -} > - > -/* > - * Produce a kfd event id for a nonsignal event. > - * These are arbitrary numbers, so we do a sequential search through > - * the hash table for an unused number. > - */ > -static u32 make_nonsignal_event_id(struct kfd_process *p) > -{ > - u32 id; > - > - for (id = p->next_nonsignal_event_id; > - id < KFD_LAST_NONSIGNAL_EVENT_ID && > - lookup_event_by_id(p, id); > - id++) > - ; > - > - if (id < KFD_LAST_NONSIGNAL_EVENT_ID) { > - > - /* > - * What if id == LAST_NONSIGNAL_EVENT_ID - 1? > - * Then next_nonsignal_event_id = LAST_NONSIGNAL_EVENT_ID so > - * the first loop fails immediately and we proceed with the > - * wraparound loop below. > - */ > - p->next_nonsignal_event_id = id + 1; > - > - return id; > - } > - > - for (id = KFD_FIRST_NONSIGNAL_EVENT_ID; > - id < KFD_LAST_NONSIGNAL_EVENT_ID && > - lookup_event_by_id(p, id); > - id++) > - ; > - > - > - if (id < KFD_LAST_NONSIGNAL_EVENT_ID) { > - p->next_nonsignal_event_id = id + 1; > - return id; > - } > - > - p->next_nonsignal_event_id = KFD_FIRST_NONSIGNAL_EVENT_ID; > - return 0; > -} > - > -static struct kfd_event *lookup_event_by_page_slot(struct kfd_process *p, > - unsigned int signal_slot) > -{ > - return lookup_event_by_id(p, signal_slot); > + return idr_find(&p->event_idr, id); > } > > static int create_signal_event(struct file *devkfd, > struct kfd_process *p, > struct kfd_event *ev) > { > + int ret; > + > if (p->signal_event_count == KFD_SIGNAL_EVENT_LIMIT) { > if (!p->signal_event_limit_reached) { > pr_warn("Signal event wasn't created because limit was reached\n"); > p->signal_event_limit_reached = true; > } > - return -ENOMEM; > + return -ENOSPC; > } > > - if (!allocate_event_notification_slot(p, &ev->signal_slot_index)) { > + ret = allocate_event_notification_slot(p, ev); > + if (ret) { > pr_warn("Signal event wasn't created because out of kernel memory\n"); > - return -ENOMEM; > + return ret; > } > > p->signal_event_count++; > > - ev->user_signal_address = > - &p->signal_page->user_address[ev->signal_slot_index]; > - > - ev->event_id = ev->signal_slot_index; > - > + ev->user_signal_address = &p->signal_page->user_address[ev->event_id]; > pr_debug("Signal event number %zu created with id %d, address %p\n", > p->signal_event_count, ev->event_id, > ev->user_signal_address); > @@ -252,16 +155,20 @@ static int create_signal_event(struct file *devkfd, > return 0; > } > > -/* > - * No non-signal events are supported yet. > - * We create them as events that never signal. > - * Set event calls from user-mode are failed. > - */ > static int create_other_event(struct kfd_process *p, struct kfd_event *ev) > { > - ev->event_id = make_nonsignal_event_id(p); > - if (ev->event_id == 0) > - return -ENOMEM; > + /* Cast KFD_LAST_NONSIGNAL_EVENT to uint32_t. This allows an > + * intentional integer overflow to -1 without a compiler > + * warning. idr_alloc treats a negative value as "maximum > + * signed integer". > + */ > + int id = idr_alloc(&p->event_idr, ev, KFD_FIRST_NONSIGNAL_EVENT_ID, > + (uint32_t)KFD_LAST_NONSIGNAL_EVENT_ID + 1, > + GFP_KERNEL); > + > + if (id < 0) > + return id; > + ev->event_id = id; > > return 0; > } > @@ -269,9 +176,8 @@ static int create_other_event(struct kfd_process *p, struct kfd_event *ev) > void kfd_event_init_process(struct kfd_process *p) > { > mutex_init(&p->event_mutex); > - hash_init(p->events); > + idr_init(&p->event_idr); > p->signal_page = NULL; > - p->next_nonsignal_event_id = KFD_FIRST_NONSIGNAL_EVENT_ID; > p->signal_event_count = 0; > } > > @@ -284,25 +190,22 @@ static void destroy_event(struct kfd_process *p, struct kfd_event *ev) > waiter->event = NULL; > wake_up_all(&ev->wq); > > - if ((ev->type == KFD_EVENT_TYPE_SIGNAL || > - ev->type == KFD_EVENT_TYPE_DEBUG) && p->signal_page) { > - release_event_notification_slot(p->signal_page, > - ev->signal_slot_index); > + if (ev->type == KFD_EVENT_TYPE_SIGNAL || > + ev->type == KFD_EVENT_TYPE_DEBUG) > p->signal_event_count--; > - } > > - hash_del(&ev->events); > + idr_remove(&p->event_idr, ev->event_id); > kfree(ev); > } > > static void destroy_events(struct kfd_process *p) > { > struct kfd_event *ev; > - struct hlist_node *tmp; > - unsigned int hash_bkt; > + uint32_t id; > > - hash_for_each_safe(p->events, hash_bkt, tmp, ev, events) > + idr_for_each_entry(&p->event_idr, ev, id) > destroy_event(p, ev); > + idr_destroy(&p->event_idr); > } > > /* > @@ -365,7 +268,7 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p, > if (!ret) { > *event_page_offset = KFD_MMAP_EVENTS_MASK; > *event_page_offset <<= PAGE_SHIFT; > - *event_slot_index = ev->signal_slot_index; > + *event_slot_index = ev->event_id; > } > break; > default: > @@ -374,8 +277,6 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p, > } > > if (!ret) { > - hash_add(p->events, &ev->events, ev->event_id); > - > *event_id = ev->event_id; > *event_trigger_data = ev->event_id; > } else { > @@ -465,17 +366,7 @@ int kfd_reset_event(struct kfd_process *p, uint32_t event_id) > > static void acknowledge_signal(struct kfd_process *p, struct kfd_event *ev) > { > - page_slots(p->signal_page)[ev->signal_slot_index] = > - UNSIGNALED_EVENT_SLOT; > -} > - > -static bool is_slot_signaled(struct kfd_process *p, unsigned int index) > -{ > - if (!p->signal_page) > - return false; > - else > - return page_slots(p->signal_page)[index] != > - UNSIGNALED_EVENT_SLOT; > + page_slots(p->signal_page)[ev->event_id] = UNSIGNALED_EVENT_SLOT; > } > > static void set_event_from_interrupt(struct kfd_process *p, > @@ -514,13 +405,31 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, > * ignore it, but we could use any bits we did receive to > * search faster. > */ > - unsigned int i; > + uint64_t *slots = page_slots(p->signal_page); > + uint32_t id; > + > + if (p->signal_event_count < KFD_SIGNAL_EVENT_LIMIT/2) { > + /* With relatively few events, it's faster to > + * iterate over the event IDR > + */ > + idr_for_each_entry(&p->event_idr, ev, id) { > + if (id >= KFD_SIGNAL_EVENT_LIMIT) > + break; > > - for (i = 0; i < SLOTS_PER_PAGE; i++) > - if (is_slot_signaled(p, i)) { > - ev = lookup_event_by_page_slot(p, i); > - set_event_from_interrupt(p, ev); > + if (slots[id] != UNSIGNALED_EVENT_SLOT) > + set_event_from_interrupt(p, ev); > } > + } else { > + /* With relatively many events, it's faster to > + * iterate over the signal slots and lookup > + * only signaled events from the IDR. > + */ > + for (id = 0; id < KFD_SIGNAL_EVENT_LIMIT; id++) > + if (slots[id] != UNSIGNALED_EVENT_SLOT) { > + ev = lookup_event_by_id(p, id); > + set_event_from_interrupt(p, ev); > + } > + } > } > > mutex_unlock(&p->event_mutex); > @@ -832,12 +741,13 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p, > { > struct kfd_hsa_memory_exception_data *ev_data; > struct kfd_event *ev; > - int bkt; > + uint32_t id; > bool send_signal = true; > > ev_data = (struct kfd_hsa_memory_exception_data *) event_data; > > - hash_for_each(p->events, bkt, ev, events) > + id = KFD_FIRST_NONSIGNAL_EVENT_ID; > + idr_for_each_entry_continue(&p->event_idr, ev, id) > if (ev->type == type) { > send_signal = false; > dev_dbg(kfd_device, > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_events.h > index f85fcee..abca5bf 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.h > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.h > @@ -31,9 +31,13 @@ > #include "kfd_priv.h" > #include <uapi/linux/kfd_ioctl.h> > > -#define KFD_EVENT_ID_NONSIGNAL_MASK 0x80000000U > -#define KFD_FIRST_NONSIGNAL_EVENT_ID KFD_EVENT_ID_NONSIGNAL_MASK > -#define KFD_LAST_NONSIGNAL_EVENT_ID UINT_MAX > +/* > + * IDR supports non-negative integer IDs. Small IDs are used for > + * signal events to match their signal slot. Use the upper half of the > + * ID space for non-signal events. > + */ > +#define KFD_FIRST_NONSIGNAL_EVENT_ID ((INT_MAX >> 1) + 1) > +#define KFD_LAST_NONSIGNAL_EVENT_ID INT_MAX > > /* > * Written into kfd_signal_slot_t to indicate that the event is not signaled. > @@ -47,9 +51,6 @@ struct kfd_event_waiter; > struct signal_page; > > struct kfd_event { > - /* All events in process, rooted at kfd_process.events. */ > - struct hlist_node events; > - > u32 event_id; > > bool signaled; > @@ -60,7 +61,6 @@ struct kfd_event { > wait_queue_head_t wq; /* List of event waiters. */ > > /* Only for signal events. */ > - unsigned int signal_slot_index; > uint64_t __user *user_signal_address; > > /* type specific data */ > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > index c1b3ee2..ebae8e1 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > @@ -31,6 +31,7 @@ > #include <linux/workqueue.h> > #include <linux/spinlock.h> > #include <linux/kfd_ioctl.h> > +#include <linux/idr.h> > #include <kgd_kfd_interface.h> > > #include "amd_shared.h" > @@ -538,11 +539,10 @@ struct kfd_process { > > /* Event-related data */ > struct mutex event_mutex; > - /* All events in process hashed by ID, linked on kfd_event.events. */ > - DECLARE_HASHTABLE(events, 4); > + /* Event ID allocator and lookup */ > + struct idr event_idr; > /* Event page */ > struct kfd_signal_page *signal_page; > - u32 next_nonsignal_event_id; > size_t signal_event_count; > bool signal_event_limit_reached; > }; > -- > 2.7.4 > This patch is: Acked-by: Oded Gabbay <oded.gabbay at gmail.com>