This speeds up signal lookup when the IH ring entry includes a valid context ID or partial context ID. Only if the context ID is found to be invalid, fall back to an exhaustive search of all signaled events. Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com> --- drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c | 7 ++- drivers/gpu/drm/amd/amdkfd/kfd_events.c | 73 +++++++++++++++++++----- 2 files changed, 64 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c index 66164aa..3d5ccb3 100644 --- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c @@ -47,6 +47,7 @@ static void cik_event_interrupt_wq(struct kfd_dev *dev, unsigned int pasid; const struct cik_ih_ring_entry *ihre = (const struct cik_ih_ring_entry *)ih_ring_entry; + uint32_t context_id = ihre->data & 0xfffffff; pasid = (ihre->ring_id & 0xffff0000) >> 16; @@ -54,11 +55,11 @@ static void cik_event_interrupt_wq(struct kfd_dev *dev, return; if (ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE) - kfd_signal_event_interrupt(pasid, 0, 0); + kfd_signal_event_interrupt(pasid, context_id, 28); else if (ihre->source_id == CIK_INTSRC_SDMA_TRAP) - kfd_signal_event_interrupt(pasid, 0, 0); + kfd_signal_event_interrupt(pasid, context_id, 28); else if (ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG) - kfd_signal_event_interrupt(pasid, ihre->data & 0xFF, 8); + kfd_signal_event_interrupt(pasid, context_id & 0xff, 8); else if (ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE) kfd_signal_hw_exception_event(pasid); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index cddd4b9..28fead5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -53,12 +53,6 @@ struct kfd_signal_page { uint64_t __user *user_address; }; -/* - * For signal events, the event ID is used as the interrupt user data. - * For SQ s_sendmsg interrupts, this is limited to 8 bits. - */ - -#define INTERRUPT_DATA_BITS 12 static uint64_t *page_slots(struct kfd_signal_page *page) { @@ -125,6 +119,54 @@ static struct kfd_event *lookup_event_by_id(struct kfd_process *p, uint32_t id) return idr_find(&p->event_idr, id); } +/** + * lookup_signaled_event_by_partial_id - Lookup signaled event from partial ID + * @p: Pointer to struct kfd_process + * @id: ID to look up + * @bits: Number of valid bits in @id + * + * Finds the first signaled event with a matching partial ID. If no + * matching signaled event is found, returns NULL. In that case the + * caller should assume that the partial ID is invalid and do an + * exhaustive search of all siglaned events. + * + * If multiple events with the same partial ID signal at the same + * time, they will be found one interrupt at a time, not necessarily + * in the same order the interrupts occurred. As long as the number of + * interrupts is correct, all signaled events will be seen by the + * driver. + */ +static struct kfd_event *lookup_signaled_event_by_partial_id( + struct kfd_process *p, uint32_t id, uint32_t bits) +{ + struct kfd_event *ev; + + if (!p->signal_page || id >= KFD_SIGNAL_EVENT_LIMIT) + return NULL; + + /* Fast path for the common case that @id is not a partial ID + * and we only need a single lookup. + */ + if (bits > 31 || (1U << bits) >= KFD_SIGNAL_EVENT_LIMIT) { + if (page_slots(p->signal_page)[id] == UNSIGNALED_EVENT_SLOT) + return NULL; + + return idr_find(&p->event_idr, id); + } + + /* General case for partial IDs: Iterate over all matching IDs + * and find the first one that has signaled. + */ + for (ev = NULL; id < KFD_SIGNAL_EVENT_LIMIT && !ev; id += 1U << bits) { + if (page_slots(p->signal_page)[id] == UNSIGNALED_EVENT_SLOT) + continue; + + ev = idr_find(&p->event_idr, id); + } + + return ev; +} + static int create_signal_event(struct file *devkfd, struct kfd_process *p, struct kfd_event *ev) @@ -381,7 +423,7 @@ static void set_event_from_interrupt(struct kfd_process *p, void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, uint32_t valid_id_bits) { - struct kfd_event *ev; + struct kfd_event *ev = NULL; /* * Because we are called from arbitrary context (workqueue) as opposed @@ -395,19 +437,24 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, mutex_lock(&p->event_mutex); - if (valid_id_bits >= INTERRUPT_DATA_BITS) { - /* Partial ID is a full ID. */ - ev = lookup_event_by_id(p, partial_id); + if (valid_id_bits) + ev = lookup_signaled_event_by_partial_id(p, partial_id, + valid_id_bits); + if (ev) { set_event_from_interrupt(p, ev); } else if (p->signal_page) { /* - * Partial ID is in fact partial. For now we completely - * ignore it, but we could use any bits we did receive to - * search faster. + * Partial ID lookup failed. Assume that the event ID + * in the interrupt payload was invalid and do an + * exhaustive search of signaled events. */ uint64_t *slots = page_slots(p->signal_page); uint32_t id; + if (valid_id_bits) + pr_debug_ratelimited("Partial ID invalid: %u (%u valid bits)\n", + partial_id, valid_id_bits); + if (p->signal_event_count < KFD_SIGNAL_EVENT_LIMIT/2) { /* With relatively few events, it's faster to * iterate over the event IDR -- 2.7.4