[PATCH 11/16] drm/amdkfd: Simplify event ID and signal slot management

oded.gabbay@xxxxxxxxx (Oded Gabbay) · Wed, 25 Oct 2017 13:42:32 +0300



On Sat, Oct 21, 2017 at 3:23 AM, Felix Kuehling <Felix.Kuehling at amd.com> wrote:
> Signal slots are identical to event IDs.
>
> Replace the used_slot_bitmap and events hash table with an IDR to
> allocate and lookup event IDs and signal slots more efficiently.
>
> Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com>
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_events.c | 230 ++++++++++----------------------
>  drivers/gpu/drm/amd/amdkfd/kfd_events.h |  14 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h   |   6 +-
>  3 files changed, 80 insertions(+), 170 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
> index f800e48..cddd4b9 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
> @@ -41,24 +41,16 @@ struct kfd_event_waiter {
>         bool activated;          /* Becomes true when event is signaled */
>  };
>
> -#define SLOTS_PER_PAGE KFD_SIGNAL_EVENT_LIMIT
> -#define SLOT_BITMAP_LONGS BITS_TO_LONGS(SLOTS_PER_PAGE)
> -
>  /*
> - * Over-complicated pooled allocator for event notification slots.
> - *
>   * Each signal event needs a 64-bit signal slot where the signaler will write
> - * a 1 before sending an interrupt.l (This is needed because some interrupts
> + * a 1 before sending an interrupt. (This is needed because some interrupts
>   * do not contain enough spare data bits to identify an event.)
> - * We get whole pages from vmalloc and map them to the process VA.
> - * Individual signal events are then allocated a slot in a page.
> + * We get whole pages and map them to the process VA.
> + * Individual signal events use their event_id as slot index.
>   */
> -
>  struct kfd_signal_page {
>         uint64_t *kernel_address;
>         uint64_t __user *user_address;
> -       unsigned int free_slots;
> -       unsigned long used_slot_bitmap[SLOT_BITMAP_LONGS];
>  };
>
>  /*
> @@ -73,34 +65,6 @@ static uint64_t *page_slots(struct kfd_signal_page *page)
>         return page->kernel_address;
>  }
>
> -static bool allocate_free_slot(struct kfd_process *process,
> -                              unsigned int *out_slot_index)
> -{
> -       struct kfd_signal_page *page = process->signal_page;
> -       unsigned int slot;
> -
> -       if (!page || page->free_slots == 0) {
> -               pr_debug("No free event signal slots were found for process %p\n",
> -                        process);
> -
> -               return false;
> -       }
> -
> -       slot = find_first_zero_bit(page->used_slot_bitmap, SLOTS_PER_PAGE);
> -
> -       __set_bit(slot, page->used_slot_bitmap);
> -       page->free_slots--;
> -
> -       page_slots(page)[slot] = UNSIGNALED_EVENT_SLOT;
> -
> -       *out_slot_index = slot;
> -
> -       pr_debug("Allocated event signal slot in page %p, slot %d\n",
> -                page, slot);
> -
> -       return true;
> -}
> -
>  static struct kfd_signal_page *allocate_signal_page(struct kfd_process *p)
>  {
>         void *backing_store;
> @@ -110,8 +74,6 @@ static struct kfd_signal_page *allocate_signal_page(struct kfd_process *p)
>         if (!page)
>                 return NULL;
>
> -       page->free_slots = SLOTS_PER_PAGE;
> -
>         backing_store = (void *) __get_free_pages(GFP_KERNEL,
>                                         get_order(KFD_SIGNAL_EVENT_LIMIT * 8));
>         if (!backing_store)
> @@ -132,28 +94,26 @@ static struct kfd_signal_page *allocate_signal_page(struct kfd_process *p)
>         return NULL;
>  }
>
> -static bool allocate_event_notification_slot(struct kfd_process *p,
> -                                            unsigned int *signal_slot_index)
> +static int allocate_event_notification_slot(struct kfd_process *p,
> +                                           struct kfd_event *ev)
>  {
> +       int id;
> +
>         if (!p->signal_page) {
>                 p->signal_page = allocate_signal_page(p);
>                 if (!p->signal_page)
> -                       return false;
> +                       return -ENOMEM;
>         }
>
> -       return allocate_free_slot(p, signal_slot_index);
> -}
> +       id = idr_alloc(&p->event_idr, ev, 0, KFD_SIGNAL_EVENT_LIMIT,
> +                      GFP_KERNEL);
> +       if (id < 0)
> +               return id;
>
> -/* Assumes that the process's event_mutex is locked. */
> -static void release_event_notification_slot(struct kfd_signal_page *page,
> -                                               size_t slot_index)
> -{
> -       __clear_bit(slot_index, page->used_slot_bitmap);
> -       page->free_slots++;
> +       ev->event_id = id;
> +       page_slots(p->signal_page)[id] = UNSIGNALED_EVENT_SLOT;
>
> -       /* We don't free signal pages, they are retained by the process
> -        * and reused until it exits.
> -        */
> +       return 0;
>  }
>
>  /*
> @@ -162,89 +122,32 @@ static void release_event_notification_slot(struct kfd_signal_page *page,
>   */
>  static struct kfd_event *lookup_event_by_id(struct kfd_process *p, uint32_t id)
>  {
> -       struct kfd_event *ev;
> -
> -       hash_for_each_possible(p->events, ev, events, id)
> -               if (ev->event_id == id)
> -                       return ev;
> -
> -       return NULL;
> -}
> -
> -/*
> - * Produce a kfd event id for a nonsignal event.
> - * These are arbitrary numbers, so we do a sequential search through
> - * the hash table for an unused number.
> - */
> -static u32 make_nonsignal_event_id(struct kfd_process *p)
> -{
> -       u32 id;
> -
> -       for (id = p->next_nonsignal_event_id;
> -               id < KFD_LAST_NONSIGNAL_EVENT_ID &&
> -               lookup_event_by_id(p, id);
> -               id++)
> -               ;
> -
> -       if (id < KFD_LAST_NONSIGNAL_EVENT_ID) {
> -
> -               /*
> -                * What if id == LAST_NONSIGNAL_EVENT_ID - 1?
> -                * Then next_nonsignal_event_id = LAST_NONSIGNAL_EVENT_ID so
> -                * the first loop fails immediately and we proceed with the
> -                * wraparound loop below.
> -                */
> -               p->next_nonsignal_event_id = id + 1;
> -
> -               return id;
> -       }
> -
> -       for (id = KFD_FIRST_NONSIGNAL_EVENT_ID;
> -               id < KFD_LAST_NONSIGNAL_EVENT_ID &&
> -               lookup_event_by_id(p, id);
> -               id++)
> -               ;
> -
> -
> -       if (id < KFD_LAST_NONSIGNAL_EVENT_ID) {
> -               p->next_nonsignal_event_id = id + 1;
> -               return id;
> -       }
> -
> -       p->next_nonsignal_event_id = KFD_FIRST_NONSIGNAL_EVENT_ID;
> -       return 0;
> -}
> -
> -static struct kfd_event *lookup_event_by_page_slot(struct kfd_process *p,
> -                                               unsigned int signal_slot)
> -{
> -       return lookup_event_by_id(p, signal_slot);
> +       return idr_find(&p->event_idr, id);
>  }
>
>  static int create_signal_event(struct file *devkfd,
>                                 struct kfd_process *p,
>                                 struct kfd_event *ev)
>  {
> +       int ret;
> +
>         if (p->signal_event_count == KFD_SIGNAL_EVENT_LIMIT) {
>                 if (!p->signal_event_limit_reached) {
>                         pr_warn("Signal event wasn't created because limit was reached\n");
>                         p->signal_event_limit_reached = true;
>                 }
> -               return -ENOMEM;
> +               return -ENOSPC;
>         }
>
> -       if (!allocate_event_notification_slot(p, &ev->signal_slot_index)) {
> +       ret = allocate_event_notification_slot(p, ev);
> +       if (ret) {
>                 pr_warn("Signal event wasn't created because out of kernel memory\n");
> -               return -ENOMEM;
> +               return ret;
>         }
>
>         p->signal_event_count++;
>
> -       ev->user_signal_address =
> -                       &p->signal_page->user_address[ev->signal_slot_index];
> -
> -       ev->event_id = ev->signal_slot_index;
> -
> +       ev->user_signal_address = &p->signal_page->user_address[ev->event_id];
>         pr_debug("Signal event number %zu created with id %d, address %p\n",
>                         p->signal_event_count, ev->event_id,
>                         ev->user_signal_address);
> @@ -252,16 +155,20 @@ static int create_signal_event(struct file *devkfd,
>         return 0;
>  }
>
> -/*
> - * No non-signal events are supported yet.
> - * We create them as events that never signal.
> - * Set event calls from user-mode are failed.
> - */
>  static int create_other_event(struct kfd_process *p, struct kfd_event *ev)
>  {
> -       ev->event_id = make_nonsignal_event_id(p);
> -       if (ev->event_id == 0)
> -               return -ENOMEM;
> +       /* Cast KFD_LAST_NONSIGNAL_EVENT to uint32_t. This allows an
> +        * intentional integer overflow to -1 without a compiler
> +        * warning. idr_alloc treats a negative value as "maximum
> +        * signed integer".
> +        */
> +       int id = idr_alloc(&p->event_idr, ev, KFD_FIRST_NONSIGNAL_EVENT_ID,
> +                          (uint32_t)KFD_LAST_NONSIGNAL_EVENT_ID + 1,
> +                          GFP_KERNEL);
> +
> +       if (id < 0)
> +               return id;
> +       ev->event_id = id;
>
>         return 0;
>  }
> @@ -269,9 +176,8 @@ static int create_other_event(struct kfd_process *p, struct kfd_event *ev)
>  void kfd_event_init_process(struct kfd_process *p)
>  {
>         mutex_init(&p->event_mutex);
> -       hash_init(p->events);
> +       idr_init(&p->event_idr);
>         p->signal_page = NULL;
> -       p->next_nonsignal_event_id = KFD_FIRST_NONSIGNAL_EVENT_ID;
>         p->signal_event_count = 0;
>  }
>
> @@ -284,25 +190,22 @@ static void destroy_event(struct kfd_process *p, struct kfd_event *ev)
>                 waiter->event = NULL;
>         wake_up_all(&ev->wq);
>
> -       if ((ev->type == KFD_EVENT_TYPE_SIGNAL ||
> -            ev->type == KFD_EVENT_TYPE_DEBUG) && p->signal_page) {
> -               release_event_notification_slot(p->signal_page,
> -                                               ev->signal_slot_index);
> +       if (ev->type == KFD_EVENT_TYPE_SIGNAL ||
> +           ev->type == KFD_EVENT_TYPE_DEBUG)
>                 p->signal_event_count--;
> -       }
>
> -       hash_del(&ev->events);
> +       idr_remove(&p->event_idr, ev->event_id);
>         kfree(ev);
>  }
>
>  static void destroy_events(struct kfd_process *p)
>  {
>         struct kfd_event *ev;
> -       struct hlist_node *tmp;
> -       unsigned int hash_bkt;
> +       uint32_t id;
>
> -       hash_for_each_safe(p->events, hash_bkt, tmp, ev, events)
> +       idr_for_each_entry(&p->event_idr, ev, id)
>                 destroy_event(p, ev);
> +       idr_destroy(&p->event_idr);
>  }
>
>  /*
> @@ -365,7 +268,7 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
>                 if (!ret) {
>                         *event_page_offset = KFD_MMAP_EVENTS_MASK;
>                         *event_page_offset <<= PAGE_SHIFT;
> -                       *event_slot_index = ev->signal_slot_index;
> +                       *event_slot_index = ev->event_id;
>                 }
>                 break;
>         default:
> @@ -374,8 +277,6 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
>         }
>
>         if (!ret) {
> -               hash_add(p->events, &ev->events, ev->event_id);
> -
>                 *event_id = ev->event_id;
>                 *event_trigger_data = ev->event_id;
>         } else {
> @@ -465,17 +366,7 @@ int kfd_reset_event(struct kfd_process *p, uint32_t event_id)
>
>  static void acknowledge_signal(struct kfd_process *p, struct kfd_event *ev)
>  {
> -       page_slots(p->signal_page)[ev->signal_slot_index] =
> -                                               UNSIGNALED_EVENT_SLOT;
> -}
> -
> -static bool is_slot_signaled(struct kfd_process *p, unsigned int index)
> -{
> -       if (!p->signal_page)
> -               return false;
> -       else
> -               return page_slots(p->signal_page)[index] !=
> -                       UNSIGNALED_EVENT_SLOT;
> +       page_slots(p->signal_page)[ev->event_id] = UNSIGNALED_EVENT_SLOT;
>  }
>
>  static void set_event_from_interrupt(struct kfd_process *p,
> @@ -514,13 +405,31 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id,
>                  * ignore it, but we could use any bits we did receive to
>                  * search faster.
>                  */
> -               unsigned int i;
> +               uint64_t *slots = page_slots(p->signal_page);
> +               uint32_t id;
> +
> +               if (p->signal_event_count < KFD_SIGNAL_EVENT_LIMIT/2) {
> +                       /* With relatively few events, it's faster to
> +                        * iterate over the event IDR
> +                        */
> +                       idr_for_each_entry(&p->event_idr, ev, id) {
> +                               if (id >= KFD_SIGNAL_EVENT_LIMIT)
> +                                       break;
>
> -               for (i = 0; i < SLOTS_PER_PAGE; i++)
> -                       if (is_slot_signaled(p, i)) {
> -                               ev = lookup_event_by_page_slot(p, i);
> -                               set_event_from_interrupt(p, ev);
> +                               if (slots[id] != UNSIGNALED_EVENT_SLOT)
> +                                       set_event_from_interrupt(p, ev);
>                         }
> +               } else {
> +                       /* With relatively many events, it's faster to
> +                        * iterate over the signal slots and lookup
> +                        * only signaled events from the IDR.
> +                        */
> +                       for (id = 0; id < KFD_SIGNAL_EVENT_LIMIT; id++)
> +                               if (slots[id] != UNSIGNALED_EVENT_SLOT) {
> +                                       ev = lookup_event_by_id(p, id);
> +                                       set_event_from_interrupt(p, ev);
> +                               }
> +               }
>         }
>
>         mutex_unlock(&p->event_mutex);
> @@ -832,12 +741,13 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p,
>  {
>         struct kfd_hsa_memory_exception_data *ev_data;
>         struct kfd_event *ev;
> -       int bkt;
> +       uint32_t id;
>         bool send_signal = true;
>
>         ev_data = (struct kfd_hsa_memory_exception_data *) event_data;
>
> -       hash_for_each(p->events, bkt, ev, events)
> +       id = KFD_FIRST_NONSIGNAL_EVENT_ID;
> +       idr_for_each_entry_continue(&p->event_idr, ev, id)
>                 if (ev->type == type) {
>                         send_signal = false;
>                         dev_dbg(kfd_device,
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_events.h
> index f85fcee..abca5bf 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.h
> @@ -31,9 +31,13 @@
>  #include "kfd_priv.h"
>  #include <uapi/linux/kfd_ioctl.h>
>
> -#define KFD_EVENT_ID_NONSIGNAL_MASK 0x80000000U
> -#define KFD_FIRST_NONSIGNAL_EVENT_ID KFD_EVENT_ID_NONSIGNAL_MASK
> -#define KFD_LAST_NONSIGNAL_EVENT_ID UINT_MAX
> +/*
> + * IDR supports non-negative integer IDs. Small IDs are used for
> + * signal events to match their signal slot. Use the upper half of the
> + * ID space for non-signal events.
> + */
> +#define KFD_FIRST_NONSIGNAL_EVENT_ID ((INT_MAX >> 1) + 1)
> +#define KFD_LAST_NONSIGNAL_EVENT_ID INT_MAX
>
>  /*
>   * Written into kfd_signal_slot_t to indicate that the event is not signaled.
> @@ -47,9 +51,6 @@ struct kfd_event_waiter;
>  struct signal_page;
>
>  struct kfd_event {
> -       /* All events in process, rooted at kfd_process.events. */
> -       struct hlist_node events;
> -
>         u32 event_id;
>
>         bool signaled;
> @@ -60,7 +61,6 @@ struct kfd_event {
>         wait_queue_head_t wq; /* List of event waiters. */
>
>         /* Only for signal events. */
> -       unsigned int signal_slot_index;
>         uint64_t __user *user_signal_address;
>
>         /* type specific data */
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index c1b3ee2..ebae8e1 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -31,6 +31,7 @@
>  #include <linux/workqueue.h>
>  #include <linux/spinlock.h>
>  #include <linux/kfd_ioctl.h>
> +#include <linux/idr.h>
>  #include <kgd_kfd_interface.h>
>
>  #include "amd_shared.h"
> @@ -538,11 +539,10 @@ struct kfd_process {
>
>         /* Event-related data */
>         struct mutex event_mutex;
> -       /* All events in process hashed by ID, linked on kfd_event.events. */
> -       DECLARE_HASHTABLE(events, 4);
> +       /* Event ID allocator and lookup */
> +       struct idr event_idr;
>         /* Event page */
>         struct kfd_signal_page *signal_page;
> -       u32 next_nonsignal_event_id;
>         size_t signal_event_count;
>         bool signal_event_limit_reached;
>  };
> --
> 2.7.4
>

This patch is:
Acked-by: Oded Gabbay <oded.gabbay at gmail.com>