The first event page is always big enough to handle all events. Handling of multiple events pages is not supported by user mode, and not necessary. Signed-off-by: Yong Zhao <yong.zhao at amd.com> Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com> Acked-by: Oded Gabbay <oded.gabbay at gmail.com> --- drivers/gpu/drm/amd/amdkfd/kfd_events.c | 197 +++++++++++--------------------- drivers/gpu/drm/amd/amdkfd/kfd_events.h | 1 - drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 4 +- 3 files changed, 70 insertions(+), 132 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index 7dae26f..7cc1710 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -41,6 +41,9 @@ struct kfd_event_waiter { bool activated; /* Becomes true when event is signaled */ }; +#define SLOTS_PER_PAGE KFD_SIGNAL_EVENT_LIMIT +#define SLOT_BITMAP_LONGS BITS_TO_LONGS(SLOTS_PER_PAGE) + /* * Over-complicated pooled allocator for event notification slots. * @@ -51,132 +54,98 @@ struct kfd_event_waiter { * Individual signal events are then allocated a slot in a page. */ -struct signal_page { - struct list_head event_pages; /* kfd_process.signal_event_pages */ +struct kfd_signal_page { uint64_t *kernel_address; uint64_t __user *user_address; - uint32_t page_index; /* Index into the mmap aperture. */ unsigned int free_slots; - unsigned long used_slot_bitmap[0]; + unsigned long used_slot_bitmap[SLOT_BITMAP_LONGS]; }; -#define SLOTS_PER_PAGE KFD_SIGNAL_EVENT_LIMIT -#define SLOT_BITMAP_SIZE BITS_TO_LONGS(SLOTS_PER_PAGE) -#define BITS_PER_PAGE (ilog2(SLOTS_PER_PAGE)+1) -#define SIGNAL_PAGE_SIZE (sizeof(struct signal_page) + \ - SLOT_BITMAP_SIZE * sizeof(long)) - /* * For signal events, the event ID is used as the interrupt user data. * For SQ s_sendmsg interrupts, this is limited to 8 bits. */ #define INTERRUPT_DATA_BITS 8 -#define SIGNAL_EVENT_ID_SLOT_SHIFT 0 -static uint64_t *page_slots(struct signal_page *page) +static uint64_t *page_slots(struct kfd_signal_page *page) { return page->kernel_address; } static bool allocate_free_slot(struct kfd_process *process, - struct signal_page **out_page, - unsigned int *out_slot_index) + unsigned int *out_slot_index) { - struct signal_page *page; + struct kfd_signal_page *page = process->signal_page; + unsigned int slot; - list_for_each_entry(page, &process->signal_event_pages, event_pages) { - if (page->free_slots > 0) { - unsigned int slot = - find_first_zero_bit(page->used_slot_bitmap, - SLOTS_PER_PAGE); + if (!page || page->free_slots == 0) { + pr_debug("No free event signal slots were found for process %p\n", + process); - __set_bit(slot, page->used_slot_bitmap); - page->free_slots--; + return false; + } - page_slots(page)[slot] = UNSIGNALED_EVENT_SLOT; + slot = find_first_zero_bit(page->used_slot_bitmap, SLOTS_PER_PAGE); - *out_page = page; - *out_slot_index = slot; + __set_bit(slot, page->used_slot_bitmap); + page->free_slots--; - pr_debug("Allocated event signal slot in page %p, slot %d\n", - page, slot); + page_slots(page)[slot] = UNSIGNALED_EVENT_SLOT; - return true; - } - } + *out_slot_index = slot; - pr_debug("No free event signal slots were found for process %p\n", - process); + pr_debug("Allocated event signal slot in page %p, slot %d\n", + page, slot); - return false; + return true; } -#define list_tail_entry(head, type, member) \ - list_entry((head)->prev, type, member) - -static bool allocate_signal_page(struct file *devkfd, struct kfd_process *p) +static struct kfd_signal_page *allocate_signal_page(struct kfd_process *p) { void *backing_store; - struct signal_page *page; + struct kfd_signal_page *page; - page = kzalloc(SIGNAL_PAGE_SIZE, GFP_KERNEL); + page = kzalloc(sizeof(*page), GFP_KERNEL); if (!page) - goto fail_alloc_signal_page; + return NULL; page->free_slots = SLOTS_PER_PAGE; - backing_store = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO, + backing_store = (void *) __get_free_pages(GFP_KERNEL, get_order(KFD_SIGNAL_EVENT_LIMIT * 8)); if (!backing_store) goto fail_alloc_signal_store; - /* prevent user-mode info leaks */ + /* Initialize all events to unsignaled */ memset(backing_store, (uint8_t) UNSIGNALED_EVENT_SLOT, - KFD_SIGNAL_EVENT_LIMIT * 8); + KFD_SIGNAL_EVENT_LIMIT * 8); page->kernel_address = backing_store; - - if (list_empty(&p->signal_event_pages)) - page->page_index = 0; - else - page->page_index = list_tail_entry(&p->signal_event_pages, - struct signal_page, - event_pages)->page_index + 1; - pr_debug("Allocated new event signal page at %p, for process %p\n", page, p); - pr_debug("Page index is %d\n", page->page_index); - - list_add(&page->event_pages, &p->signal_event_pages); - return true; + return page; fail_alloc_signal_store: kfree(page); -fail_alloc_signal_page: - return false; + return NULL; } -static bool allocate_event_notification_slot(struct file *devkfd, - struct kfd_process *p, - struct signal_page **page, - unsigned int *signal_slot_index) +static bool allocate_event_notification_slot(struct kfd_process *p, + unsigned int *signal_slot_index) { - bool ret; - - ret = allocate_free_slot(p, page, signal_slot_index); - if (!ret) { - ret = allocate_signal_page(devkfd, p); - if (ret) - ret = allocate_free_slot(p, page, signal_slot_index); + if (!p->signal_page) { + p->signal_page = allocate_signal_page(p); + if (!p->signal_page) + return false; } - return ret; + return allocate_free_slot(p, signal_slot_index); } /* Assumes that the process's event_mutex is locked. */ -static void release_event_notification_slot(struct signal_page *page, +static void release_event_notification_slot(struct kfd_signal_page *page, size_t slot_index) { __clear_bit(slot_index, page->used_slot_bitmap); @@ -187,22 +156,6 @@ static void release_event_notification_slot(struct signal_page *page, */ } -static struct signal_page *lookup_signal_page_by_index(struct kfd_process *p, - unsigned int page_index) -{ - struct signal_page *page; - - /* - * This is safe because we don't delete signal pages until the - * process exits. - */ - list_for_each_entry(page, &p->signal_event_pages, event_pages) - if (page->page_index == page_index) - return page; - - return NULL; -} - /* * Assumes that p->event_mutex is held and of course that p is not going * away (current or locked). @@ -218,13 +171,6 @@ static struct kfd_event *lookup_event_by_id(struct kfd_process *p, uint32_t id) return NULL; } -static u32 make_signal_event_id(struct signal_page *page, - unsigned int signal_slot_index) -{ - return page->page_index | - (signal_slot_index << SIGNAL_EVENT_ID_SLOT_SHIFT); -} - /* * Produce a kfd event id for a nonsignal event. * These are arbitrary numbers, so we do a sequential search through @@ -270,10 +216,9 @@ static u32 make_nonsignal_event_id(struct kfd_process *p) } static struct kfd_event *lookup_event_by_page_slot(struct kfd_process *p, - struct signal_page *page, unsigned int signal_slot) { - return lookup_event_by_id(p, make_signal_event_id(page, signal_slot)); + return lookup_event_by_id(p, signal_slot); } static int create_signal_event(struct file *devkfd, @@ -288,8 +233,7 @@ static int create_signal_event(struct file *devkfd, return -ENOMEM; } - if (!allocate_event_notification_slot(devkfd, p, &ev->signal_page, - &ev->signal_slot_index)) { + if (!allocate_event_notification_slot(p, &ev->signal_slot_index)) { pr_warn("Signal event wasn't created because out of kernel memory\n"); return -ENOMEM; } @@ -297,10 +241,9 @@ static int create_signal_event(struct file *devkfd, p->signal_event_count++; ev->user_signal_address = - &ev->signal_page->user_address[ev->signal_slot_index]; + &p->signal_page->user_address[ev->signal_slot_index]; - ev->event_id = make_signal_event_id(ev->signal_page, - ev->signal_slot_index); + ev->event_id = ev->signal_slot_index; pr_debug("Signal event number %zu created with id %d, address %p\n", p->signal_event_count, ev->event_id, @@ -327,7 +270,7 @@ void kfd_event_init_process(struct kfd_process *p) { mutex_init(&p->event_mutex); hash_init(p->events); - INIT_LIST_HEAD(&p->signal_event_pages); + p->signal_page = NULL; p->next_nonsignal_event_id = KFD_FIRST_NONSIGNAL_EVENT_ID; p->signal_event_count = 0; } @@ -341,8 +284,9 @@ static void destroy_event(struct kfd_process *p, struct kfd_event *ev) waiter->event = NULL; wake_up_all(&ev->wq); - if (ev->signal_page) { - release_event_notification_slot(ev->signal_page, + if ((ev->type == KFD_EVENT_TYPE_SIGNAL || + ev->type == KFD_EVENT_TYPE_DEBUG) && p->signal_page) { + release_event_notification_slot(p->signal_page, ev->signal_slot_index); p->signal_event_count--; } @@ -365,12 +309,11 @@ static void destroy_events(struct kfd_process *p) * We assume that the process is being destroyed and there is no need to * unmap the pages or keep bookkeeping data in order. */ -static void shutdown_signal_pages(struct kfd_process *p) +static void shutdown_signal_page(struct kfd_process *p) { - struct signal_page *page, *tmp; + struct kfd_signal_page *page = p->signal_page; - list_for_each_entry_safe(page, tmp, &p->signal_event_pages, - event_pages) { + if (page) { free_pages((unsigned long)page->kernel_address, get_order(KFD_SIGNAL_EVENT_LIMIT * 8)); kfree(page); @@ -380,7 +323,7 @@ static void shutdown_signal_pages(struct kfd_process *p) void kfd_event_free_process(struct kfd_process *p) { destroy_events(p); - shutdown_signal_pages(p); + shutdown_signal_page(p); } static bool event_can_be_gpu_signaled(const struct kfd_event *ev) @@ -420,8 +363,7 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p, case KFD_EVENT_TYPE_DEBUG: ret = create_signal_event(devkfd, p, ev); if (!ret) { - *event_page_offset = (ev->signal_page->page_index | - KFD_MMAP_EVENTS_MASK); + *event_page_offset = KFD_MMAP_EVENTS_MASK; *event_page_offset <<= PAGE_SHIFT; *event_slot_index = ev->signal_slot_index; } @@ -527,13 +469,17 @@ int kfd_reset_event(struct kfd_process *p, uint32_t event_id) static void acknowledge_signal(struct kfd_process *p, struct kfd_event *ev) { - page_slots(ev->signal_page)[ev->signal_slot_index] = + page_slots(p->signal_page)[ev->signal_slot_index] = UNSIGNALED_EVENT_SLOT; } -static bool is_slot_signaled(struct signal_page *page, unsigned int index) +static bool is_slot_signaled(struct kfd_process *p, unsigned int index) { - return page_slots(page)[index] != UNSIGNALED_EVENT_SLOT; + if (!p->signal_page) + return false; + else + return page_slots(p->signal_page)[index] != + UNSIGNALED_EVENT_SLOT; } static void set_event_from_interrupt(struct kfd_process *p, @@ -566,22 +512,19 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, /* Partial ID is a full ID. */ ev = lookup_event_by_id(p, partial_id); set_event_from_interrupt(p, ev); - } else { + } else if (p->signal_page) { /* * Partial ID is in fact partial. For now we completely * ignore it, but we could use any bits we did receive to * search faster. */ - struct signal_page *page; unsigned int i; - list_for_each_entry(page, &p->signal_event_pages, event_pages) - for (i = 0; i < SLOTS_PER_PAGE; i++) - if (is_slot_signaled(page, i)) { - ev = lookup_event_by_page_slot(p, - page, i); - set_event_from_interrupt(p, ev); - } + for (i = 0; i < SLOTS_PER_PAGE; i++) + if (is_slot_signaled(p, i)) { + ev = lookup_event_by_page_slot(p, i); + set_event_from_interrupt(p, ev); + } } mutex_unlock(&p->event_mutex); @@ -846,9 +789,8 @@ int kfd_wait_on_events(struct kfd_process *p, int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma) { - unsigned int page_index; unsigned long pfn; - struct signal_page *page; + struct kfd_signal_page *page; /* check required size is logical */ if (get_order(KFD_SIGNAL_EVENT_LIMIT * 8) != @@ -857,13 +799,10 @@ int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma) return -EINVAL; } - page_index = vma->vm_pgoff; - - page = lookup_signal_page_by_index(p, page_index); + page = p->signal_page; if (!page) { /* Probably KFD bug, but mmap is user-accessible. */ - pr_debug("Signal page could not be found for page_index %u\n", - page_index); + pr_debug("Signal page could not be found\n"); return -EINVAL; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_events.h index 96f9122..f85fcee 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.h @@ -60,7 +60,6 @@ struct kfd_event { wait_queue_head_t wq; /* List of event waiters. */ /* Only for signal events. */ - struct signal_page *signal_page; unsigned int signal_slot_index; uint64_t __user *user_signal_address; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index d3cf53a..c1b3ee2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -540,8 +540,8 @@ struct kfd_process { struct mutex event_mutex; /* All events in process hashed by ID, linked on kfd_event.events. */ DECLARE_HASHTABLE(events, 4); - /* struct slot_page_header.event_pages */ - struct list_head signal_event_pages; + /* Event page */ + struct kfd_signal_page *signal_page; u32 next_nonsignal_event_id; size_t signal_event_count; bool signal_event_limit_reached; -- 2.7.4