On Sat, Oct 21, 2017 at 3:23 AM, Felix Kuehling <Felix.Kuehling at amd.com> wrote: > The first event page is always big enough to handle all events. > Handling of multiple events pages is not supported by user mode, and > not necessary. > > Signed-off-by: Yong Zhao <yong.zhao at amd.com> > Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com> > --- > drivers/gpu/drm/amd/amdkfd/kfd_events.c | 197 +++++++++++--------------------- > drivers/gpu/drm/amd/amdkfd/kfd_events.h | 1 - > drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 4 +- > 3 files changed, 70 insertions(+), 132 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c > index f178248..f800e48 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c > @@ -41,6 +41,9 @@ struct kfd_event_waiter { > bool activated; /* Becomes true when event is signaled */ > }; > > +#define SLOTS_PER_PAGE KFD_SIGNAL_EVENT_LIMIT > +#define SLOT_BITMAP_LONGS BITS_TO_LONGS(SLOTS_PER_PAGE) > + > /* > * Over-complicated pooled allocator for event notification slots. > * > @@ -51,132 +54,98 @@ struct kfd_event_waiter { > * Individual signal events are then allocated a slot in a page. > */ > > -struct signal_page { > - struct list_head event_pages; /* kfd_process.signal_event_pages */ > +struct kfd_signal_page { > uint64_t *kernel_address; > uint64_t __user *user_address; > - uint32_t page_index; /* Index into the mmap aperture. */ > unsigned int free_slots; > - unsigned long used_slot_bitmap[0]; > + unsigned long used_slot_bitmap[SLOT_BITMAP_LONGS]; > }; > > -#define SLOTS_PER_PAGE KFD_SIGNAL_EVENT_LIMIT > -#define SLOT_BITMAP_SIZE BITS_TO_LONGS(SLOTS_PER_PAGE) > -#define BITS_PER_PAGE (ilog2(SLOTS_PER_PAGE)+1) > -#define SIGNAL_PAGE_SIZE (sizeof(struct signal_page) + \ > - SLOT_BITMAP_SIZE * sizeof(long)) > - > /* > * For signal events, the event ID is used as the interrupt user data. > * For SQ s_sendmsg interrupts, this is limited to 8 bits. > */ > > #define INTERRUPT_DATA_BITS 12 > -#define SIGNAL_EVENT_ID_SLOT_SHIFT 0 > > -static uint64_t *page_slots(struct signal_page *page) > +static uint64_t *page_slots(struct kfd_signal_page *page) > { > return page->kernel_address; > } > > static bool allocate_free_slot(struct kfd_process *process, > - struct signal_page **out_page, > - unsigned int *out_slot_index) > + unsigned int *out_slot_index) > { > - struct signal_page *page; > + struct kfd_signal_page *page = process->signal_page; > + unsigned int slot; > > - list_for_each_entry(page, &process->signal_event_pages, event_pages) { > - if (page->free_slots > 0) { > - unsigned int slot = > - find_first_zero_bit(page->used_slot_bitmap, > - SLOTS_PER_PAGE); > + if (!page || page->free_slots == 0) { > + pr_debug("No free event signal slots were found for process %p\n", > + process); > > - __set_bit(slot, page->used_slot_bitmap); > - page->free_slots--; > + return false; > + } > > - page_slots(page)[slot] = UNSIGNALED_EVENT_SLOT; > + slot = find_first_zero_bit(page->used_slot_bitmap, SLOTS_PER_PAGE); > > - *out_page = page; > - *out_slot_index = slot; > + __set_bit(slot, page->used_slot_bitmap); > + page->free_slots--; > > - pr_debug("Allocated event signal slot in page %p, slot %d\n", > - page, slot); > + page_slots(page)[slot] = UNSIGNALED_EVENT_SLOT; > > - return true; > - } > - } > + *out_slot_index = slot; > > - pr_debug("No free event signal slots were found for process %p\n", > - process); > + pr_debug("Allocated event signal slot in page %p, slot %d\n", > + page, slot); > > - return false; > + return true; > } > > -#define list_tail_entry(head, type, member) \ > - list_entry((head)->prev, type, member) > - > -static bool allocate_signal_page(struct file *devkfd, struct kfd_process *p) > +static struct kfd_signal_page *allocate_signal_page(struct kfd_process *p) > { > void *backing_store; > - struct signal_page *page; > + struct kfd_signal_page *page; > > - page = kzalloc(SIGNAL_PAGE_SIZE, GFP_KERNEL); > + page = kzalloc(sizeof(*page), GFP_KERNEL); > if (!page) > - goto fail_alloc_signal_page; > + return NULL; > > page->free_slots = SLOTS_PER_PAGE; > > - backing_store = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO, > + backing_store = (void *) __get_free_pages(GFP_KERNEL, > get_order(KFD_SIGNAL_EVENT_LIMIT * 8)); > if (!backing_store) > goto fail_alloc_signal_store; > > - /* prevent user-mode info leaks */ > + /* Initialize all events to unsignaled */ > memset(backing_store, (uint8_t) UNSIGNALED_EVENT_SLOT, > - KFD_SIGNAL_EVENT_LIMIT * 8); > + KFD_SIGNAL_EVENT_LIMIT * 8); > > page->kernel_address = backing_store; > - > - if (list_empty(&p->signal_event_pages)) > - page->page_index = 0; > - else > - page->page_index = list_tail_entry(&p->signal_event_pages, > - struct signal_page, > - event_pages)->page_index + 1; > - > pr_debug("Allocated new event signal page at %p, for process %p\n", > page, p); > - pr_debug("Page index is %d\n", page->page_index); > - > - list_add(&page->event_pages, &p->signal_event_pages); > > - return true; > + return page; > > fail_alloc_signal_store: > kfree(page); > -fail_alloc_signal_page: > - return false; > + return NULL; > } > > -static bool allocate_event_notification_slot(struct file *devkfd, > - struct kfd_process *p, > - struct signal_page **page, > - unsigned int *signal_slot_index) > +static bool allocate_event_notification_slot(struct kfd_process *p, > + unsigned int *signal_slot_index) > { > - bool ret; > - > - ret = allocate_free_slot(p, page, signal_slot_index); > - if (!ret) { > - ret = allocate_signal_page(devkfd, p); > - if (ret) > - ret = allocate_free_slot(p, page, signal_slot_index); > + if (!p->signal_page) { > + p->signal_page = allocate_signal_page(p); > + if (!p->signal_page) > + return false; > } > > - return ret; > + return allocate_free_slot(p, signal_slot_index); > } > > /* Assumes that the process's event_mutex is locked. */ > -static void release_event_notification_slot(struct signal_page *page, > +static void release_event_notification_slot(struct kfd_signal_page *page, > size_t slot_index) > { > __clear_bit(slot_index, page->used_slot_bitmap); > @@ -187,22 +156,6 @@ static void release_event_notification_slot(struct signal_page *page, > */ > } > > -static struct signal_page *lookup_signal_page_by_index(struct kfd_process *p, > - unsigned int page_index) > -{ > - struct signal_page *page; > - > - /* > - * This is safe because we don't delete signal pages until the > - * process exits. > - */ > - list_for_each_entry(page, &p->signal_event_pages, event_pages) > - if (page->page_index == page_index) > - return page; > - > - return NULL; > -} > - > /* > * Assumes that p->event_mutex is held and of course that p is not going > * away (current or locked). > @@ -218,13 +171,6 @@ static struct kfd_event *lookup_event_by_id(struct kfd_process *p, uint32_t id) > return NULL; > } > > -static u32 make_signal_event_id(struct signal_page *page, > - unsigned int signal_slot_index) > -{ > - return page->page_index | > - (signal_slot_index << SIGNAL_EVENT_ID_SLOT_SHIFT); > -} > - > /* > * Produce a kfd event id for a nonsignal event. > * These are arbitrary numbers, so we do a sequential search through > @@ -270,10 +216,9 @@ static u32 make_nonsignal_event_id(struct kfd_process *p) > } > > static struct kfd_event *lookup_event_by_page_slot(struct kfd_process *p, > - struct signal_page *page, > unsigned int signal_slot) > { > - return lookup_event_by_id(p, make_signal_event_id(page, signal_slot)); > + return lookup_event_by_id(p, signal_slot); > } > > static int create_signal_event(struct file *devkfd, > @@ -288,8 +233,7 @@ static int create_signal_event(struct file *devkfd, > return -ENOMEM; > } > > - if (!allocate_event_notification_slot(devkfd, p, &ev->signal_page, > - &ev->signal_slot_index)) { > + if (!allocate_event_notification_slot(p, &ev->signal_slot_index)) { > pr_warn("Signal event wasn't created because out of kernel memory\n"); > return -ENOMEM; > } > @@ -297,10 +241,9 @@ static int create_signal_event(struct file *devkfd, > p->signal_event_count++; > > ev->user_signal_address = > - &ev->signal_page->user_address[ev->signal_slot_index]; > + &p->signal_page->user_address[ev->signal_slot_index]; > > - ev->event_id = make_signal_event_id(ev->signal_page, > - ev->signal_slot_index); > + ev->event_id = ev->signal_slot_index; > > pr_debug("Signal event number %zu created with id %d, address %p\n", > p->signal_event_count, ev->event_id, > @@ -327,7 +270,7 @@ void kfd_event_init_process(struct kfd_process *p) > { > mutex_init(&p->event_mutex); > hash_init(p->events); > - INIT_LIST_HEAD(&p->signal_event_pages); > + p->signal_page = NULL; > p->next_nonsignal_event_id = KFD_FIRST_NONSIGNAL_EVENT_ID; > p->signal_event_count = 0; > } > @@ -341,8 +284,9 @@ static void destroy_event(struct kfd_process *p, struct kfd_event *ev) > waiter->event = NULL; > wake_up_all(&ev->wq); > > - if (ev->signal_page) { > - release_event_notification_slot(ev->signal_page, > + if ((ev->type == KFD_EVENT_TYPE_SIGNAL || > + ev->type == KFD_EVENT_TYPE_DEBUG) && p->signal_page) { > + release_event_notification_slot(p->signal_page, > ev->signal_slot_index); > p->signal_event_count--; > } > @@ -365,12 +309,11 @@ static void destroy_events(struct kfd_process *p) > * We assume that the process is being destroyed and there is no need to > * unmap the pages or keep bookkeeping data in order. > */ > -static void shutdown_signal_pages(struct kfd_process *p) > +static void shutdown_signal_page(struct kfd_process *p) > { > - struct signal_page *page, *tmp; > + struct kfd_signal_page *page = p->signal_page; > > - list_for_each_entry_safe(page, tmp, &p->signal_event_pages, > - event_pages) { > + if (page) { > free_pages((unsigned long)page->kernel_address, > get_order(KFD_SIGNAL_EVENT_LIMIT * 8)); > kfree(page); > @@ -380,7 +323,7 @@ static void shutdown_signal_pages(struct kfd_process *p) > void kfd_event_free_process(struct kfd_process *p) > { > destroy_events(p); > - shutdown_signal_pages(p); > + shutdown_signal_page(p); > } > > static bool event_can_be_gpu_signaled(const struct kfd_event *ev) > @@ -420,8 +363,7 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p, > case KFD_EVENT_TYPE_DEBUG: > ret = create_signal_event(devkfd, p, ev); > if (!ret) { > - *event_page_offset = (ev->signal_page->page_index | > - KFD_MMAP_EVENTS_MASK); > + *event_page_offset = KFD_MMAP_EVENTS_MASK; > *event_page_offset <<= PAGE_SHIFT; > *event_slot_index = ev->signal_slot_index; > } > @@ -523,13 +465,17 @@ int kfd_reset_event(struct kfd_process *p, uint32_t event_id) > > static void acknowledge_signal(struct kfd_process *p, struct kfd_event *ev) > { > - page_slots(ev->signal_page)[ev->signal_slot_index] = > + page_slots(p->signal_page)[ev->signal_slot_index] = > UNSIGNALED_EVENT_SLOT; > } > > -static bool is_slot_signaled(struct signal_page *page, unsigned int index) > +static bool is_slot_signaled(struct kfd_process *p, unsigned int index) > { > - return page_slots(page)[index] != UNSIGNALED_EVENT_SLOT; > + if (!p->signal_page) > + return false; > + else > + return page_slots(p->signal_page)[index] != > + UNSIGNALED_EVENT_SLOT; > } > > static void set_event_from_interrupt(struct kfd_process *p, > @@ -562,22 +508,19 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, > /* Partial ID is a full ID. */ > ev = lookup_event_by_id(p, partial_id); > set_event_from_interrupt(p, ev); > - } else { > + } else if (p->signal_page) { > /* > * Partial ID is in fact partial. For now we completely > * ignore it, but we could use any bits we did receive to > * search faster. > */ > - struct signal_page *page; > unsigned int i; > > - list_for_each_entry(page, &p->signal_event_pages, event_pages) > - for (i = 0; i < SLOTS_PER_PAGE; i++) > - if (is_slot_signaled(page, i)) { > - ev = lookup_event_by_page_slot(p, > - page, i); > - set_event_from_interrupt(p, ev); > - } > + for (i = 0; i < SLOTS_PER_PAGE; i++) > + if (is_slot_signaled(p, i)) { > + ev = lookup_event_by_page_slot(p, i); > + set_event_from_interrupt(p, ev); > + } > } > > mutex_unlock(&p->event_mutex); > @@ -842,9 +785,8 @@ int kfd_wait_on_events(struct kfd_process *p, > int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma) > { > > - unsigned int page_index; > unsigned long pfn; > - struct signal_page *page; > + struct kfd_signal_page *page; > > /* check required size is logical */ > if (get_order(KFD_SIGNAL_EVENT_LIMIT * 8) != > @@ -853,13 +795,10 @@ int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma) > return -EINVAL; > } > > - page_index = vma->vm_pgoff; > - > - page = lookup_signal_page_by_index(p, page_index); > + page = p->signal_page; > if (!page) { > /* Probably KFD bug, but mmap is user-accessible. */ > - pr_debug("Signal page could not be found for page_index %u\n", > - page_index); > + pr_debug("Signal page could not be found\n"); > return -EINVAL; > } > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_events.h > index 96f9122..f85fcee 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.h > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.h > @@ -60,7 +60,6 @@ struct kfd_event { > wait_queue_head_t wq; /* List of event waiters. */ > > /* Only for signal events. */ > - struct signal_page *signal_page; > unsigned int signal_slot_index; > uint64_t __user *user_signal_address; > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > index d3cf53a..c1b3ee2 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > @@ -540,8 +540,8 @@ struct kfd_process { > struct mutex event_mutex; > /* All events in process hashed by ID, linked on kfd_event.events. */ > DECLARE_HASHTABLE(events, 4); > - /* struct slot_page_header.event_pages */ > - struct list_head signal_event_pages; > + /* Event page */ > + struct kfd_signal_page *signal_page; > u32 next_nonsignal_event_id; > size_t signal_event_count; > bool signal_event_limit_reached; > -- > 2.7.4 > This patch is: Acked-by: Oded Gabbay <oded.gabbay at gmail.com>