When an event with pending waiters is destroyed, those waiters may end up sleeping forever unless they are notified and woken up. Implement the notification by clearing the waiter->event pointer, which becomes invalid anyway, when the event is freed, and waking up the waiting tasks. Waiters on an event that's destroyed return failure. Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com> --- drivers/gpu/drm/amd/amdkfd/kfd_events.c | 72 +++++++++++++++++++++------------ 1 file changed, 46 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index 5bb88b74..6050e88 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -345,18 +345,24 @@ void kfd_event_init_process(struct kfd_process *p) static void destroy_event(struct kfd_process *p, struct kfd_event *ev) { + /* Wake up pending waiters. They will return failure */ + while (!list_empty(&ev->waiters)) { + struct kfd_event_waiter *waiter = + list_first_entry(&ev->waiters, struct kfd_event_waiter, + waiters); + + waiter->event = NULL; + /* _init because free_waiters will call list_del */ + list_del_init(&waiter->waiters); + wake_up_process(waiter->sleeping_task); + } + if (ev->signal_page) { release_event_notification_slot(ev->signal_page, ev->signal_slot_index); p->signal_event_count--; } - /* - * Abandon the list of waiters. Individual waiting threads will - * clean up their own data. - */ - list_del(&ev->waiters); - hash_del(&ev->events); kfree(ev); } @@ -646,22 +652,36 @@ static void init_event_waiter_add_to_waitlist(struct kfd_event_waiter *waiter) list_add(&waiter->waiters, &ev->waiters); } -static bool test_event_condition(bool all, uint32_t num_events, +/* test_event_condition - Test condition of events being waited for + * @all: Return completion only if all events have signaled + * @num_events: Number of events to wait for + * @event_waiters: Array of event waiters, one per event + * + * Returns KFD_IOC_WAIT_RESULT_COMPLETE if all (or one) event(s) have + * signaled. Returns KFD_IOC_WAIT_RESULT_TIMEOUT if no (or not all) + * events have signaled. Returns KFD_IOC_WAIT_RESULT_FAIL if any of + * the events have been destroyed. + */ +static uint32_t test_event_condition(bool all, uint32_t num_events, struct kfd_event_waiter *event_waiters) { uint32_t i; uint32_t activated_count = 0; for (i = 0; i < num_events; i++) { + if (!event_waiters[i].event) + return KFD_IOC_WAIT_RESULT_FAIL; + if (event_waiters[i].activated) { if (!all) - return true; + return KFD_IOC_WAIT_RESULT_COMPLETE; activated_count++; } } - return activated_count == num_events; + return activated_count == num_events ? + KFD_IOC_WAIT_RESULT_COMPLETE : KFD_IOC_WAIT_RESULT_TIMEOUT; } /* @@ -745,11 +765,6 @@ int kfd_wait_on_events(struct kfd_process *p, mutex_lock(&p->event_mutex); - /* Set to something unreasonable - this is really - * just a bool for now. - */ - *wait_result = KFD_IOC_WAIT_RESULT_TIMEOUT; - for (i = 0; i < num_events; i++) { struct kfd_event_data event_data; @@ -766,17 +781,22 @@ int kfd_wait_on_events(struct kfd_process *p, } /* Check condition once. */ - if (test_event_condition(all, num_events, event_waiters)) { - *wait_result = KFD_IOC_WAIT_RESULT_COMPLETE; + *wait_result = test_event_condition(all, num_events, event_waiters); + if (*wait_result == KFD_IOC_WAIT_RESULT_COMPLETE) { ret = copy_signaled_event_data(num_events, event_waiters, events); goto out_unlock; - } else { - /* Add to wait lists if we need to wait. */ - for (i = 0; i < num_events; i++) - init_event_waiter_add_to_waitlist(&event_waiters[i]); + } else if (WARN_ON(*wait_result == KFD_IOC_WAIT_RESULT_FAIL)) { + /* This should not happen. Events shouldn't be + * destroyed while we're holding the event_mutex + */ + goto out_unlock; } + /* Add to wait lists if we need to wait. */ + for (i = 0; i < num_events; i++) + init_event_waiter_add_to_waitlist(&event_waiters[i]); + mutex_unlock(&p->event_mutex); while (true) { @@ -809,15 +829,13 @@ int kfd_wait_on_events(struct kfd_process *p, */ set_current_state(TASK_INTERRUPTIBLE); - if (test_event_condition(all, num_events, event_waiters)) { - *wait_result = KFD_IOC_WAIT_RESULT_COMPLETE; + *wait_result = test_event_condition(all, num_events, + event_waiters); + if (*wait_result != KFD_IOC_WAIT_RESULT_TIMEOUT) break; - } - if (timeout <= 0) { - *wait_result = KFD_IOC_WAIT_RESULT_TIMEOUT; + if (timeout <= 0) break; - } timeout = schedule_timeout(timeout); } @@ -837,6 +855,8 @@ int kfd_wait_on_events(struct kfd_process *p, out: if (ret) *wait_result = KFD_IOC_WAIT_RESULT_FAIL; + else if (*wait_result == KFD_IOC_WAIT_RESULT_FAIL) + ret = -EIO; return ret; } -- 2.7.4