Re: [PATCH 1/1] drm/amdkfd: Improve concurrency of event handling

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




Am 2022-03-03 um 02:25 schrieb Christian König:
Am 02.03.22 um 21:06 schrieb Felix Kuehling:
Use rcu_read_lock to read p->event_idr concurrently with other readers
and writers. Use p->event_mutex only for creating and destroying events
and in kfd_wait_on_events.

That might not necessary work as you expected. rcu_read_lock() does not provide barriers for the CPU cache.

Protect the contents of the kfd_event structure with a per-event
spinlock that can be taken inside the rcu_read_lock critical section.

That helps to prevent problems for the ev structure, but it doesn't protect the idr itself.

idr_find can be used safely under rcu_read_lock according to this comment in idr.h:

 * idr_find() is able to be called locklessly, using RCU. The caller must
 * ensure calls to this function are made within rcu_read_lock() regions.
 * Other readers (lock-free or otherwise) and modifications may be running
 * concurrently.
 *
 * It is still required that the caller manage the synchronization and
 * lifetimes of the items. So if RCU lock-free lookups are used, typically
 * this would mean that the items have their own locks, or are amenable to
 * lock-free access; and that the items are freed by RCU (or only freed after
 * having been deleted from the idr tree *and* a synchronize_rcu() grace
 * period).

It was introduced by f9c46d6ea5ce ("idr: make idr_find rcu-safe") in 2008.



BTW: Why are you using an idr in the first place? I don't see the lookup used anywhere.

lookup_event_by_id is just a wrapper around idr_find. It's used in kfd_event_destroy, kfd_set_event, kfd_reset_event and kfd_wait_on_events. lookup_signaled_event_by_partial_id also uses idr_find. It's used in kfd_signal_event_interrupt.

Regards,
  Felix



Regards,
Christian.


This eliminates contention of p->event_mutex in set_event, which tends
to be on the critical path for dispatch latency even when busy waiting
is used. It also eliminates lock contention in event interrupt handlers.
Since the p->event_mutex is now used much less, the impact of requiring
it in kfd_wait_on_events should also be much smaller.

This should improve event handling latency for processes using multiple
GPUs concurrently.

Signed-off-by: Felix Kuehling <Felix.Kuehling@xxxxxxx>
---
  drivers/gpu/drm/amd/amdkfd/kfd_events.c | 119 +++++++++++++++---------
  drivers/gpu/drm/amd/amdkfd/kfd_events.h |   1 +
  2 files changed, 78 insertions(+), 42 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index deecccebe5b6..1726306715b2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -128,8 +128,8 @@ static int allocate_event_notification_slot(struct kfd_process *p,
  }
    /*
- * Assumes that p->event_mutex is held and of course that p is not going
- * away (current or locked).
+ * Assumes that p->event_mutex or rcu_readlock is held and of course that p is
+ * not going away.
   */
  static struct kfd_event *lookup_event_by_id(struct kfd_process *p, uint32_t id)
  {
@@ -251,15 +251,18 @@ static void destroy_event(struct kfd_process *p, struct kfd_event *ev)
      struct kfd_event_waiter *waiter;
        /* Wake up pending waiters. They will return failure */
+    spin_lock(&ev->lock);
      list_for_each_entry(waiter, &ev->wq.head, wait.entry)
-        waiter->event = NULL;
+        WRITE_ONCE(waiter->event, NULL);
      wake_up_all(&ev->wq);
+    spin_unlock(&ev->lock);
        if (ev->type == KFD_EVENT_TYPE_SIGNAL ||
          ev->type == KFD_EVENT_TYPE_DEBUG)
          p->signal_event_count--;
        idr_remove(&p->event_idr, ev->event_id);
+    synchronize_rcu();
      kfree(ev);
  }
  @@ -392,6 +395,7 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
      ev->auto_reset = auto_reset;
      ev->signaled = false;
  +    spin_lock_init(&ev->lock);
      init_waitqueue_head(&ev->wq);
        *event_page_offset = 0;
@@ -466,6 +470,7 @@ int kfd_criu_restore_event(struct file *devkfd,
      ev->auto_reset = ev_priv->auto_reset;
      ev->signaled = ev_priv->signaled;
  +    spin_lock_init(&ev->lock);
      init_waitqueue_head(&ev->wq);
        mutex_lock(&p->event_mutex);
@@ -609,13 +614,13 @@ static void set_event(struct kfd_event *ev)
        /* Auto reset if the list is non-empty and we're waking
       * someone. waitqueue_active is safe here because we're
-     * protected by the p->event_mutex, which is also held when
+     * protected by the ev->lock, which is also held when
       * updating the wait queues in kfd_wait_on_events.
       */
      ev->signaled = !ev->auto_reset || !waitqueue_active(&ev->wq);
        list_for_each_entry(waiter, &ev->wq.head, wait.entry)
-        waiter->activated = true;
+        WRITE_ONCE(waiter->activated, true);
        wake_up_all(&ev->wq);
  }
@@ -626,16 +631,18 @@ int kfd_set_event(struct kfd_process *p, uint32_t event_id)
      int ret = 0;
      struct kfd_event *ev;
  -    mutex_lock(&p->event_mutex);
+    rcu_read_lock();
        ev = lookup_event_by_id(p, event_id);
+    spin_lock(&ev->lock);
        if (ev && event_can_be_cpu_signaled(ev))
          set_event(ev);
      else
          ret = -EINVAL;
  -    mutex_unlock(&p->event_mutex);
+    spin_unlock(&ev->lock);
+    rcu_read_unlock();
      return ret;
  }
  @@ -650,23 +657,25 @@ int kfd_reset_event(struct kfd_process *p, uint32_t event_id)
      int ret = 0;
      struct kfd_event *ev;
  -    mutex_lock(&p->event_mutex);
+    rcu_read_lock();
        ev = lookup_event_by_id(p, event_id);
+    spin_lock(&ev->lock);
        if (ev && event_can_be_cpu_signaled(ev))
          reset_event(ev);
      else
          ret = -EINVAL;
  -    mutex_unlock(&p->event_mutex);
+    spin_unlock(&ev->lock);
+    rcu_read_unlock();
      return ret;
    }
    static void acknowledge_signal(struct kfd_process *p, struct kfd_event *ev)
  {
-    page_slots(p->signal_page)[ev->event_id] = UNSIGNALED_EVENT_SLOT;
+    WRITE_ONCE(page_slots(p->signal_page)[ev->event_id], UNSIGNALED_EVENT_SLOT);
  }
    static void set_event_from_interrupt(struct kfd_process *p,
@@ -674,7 +683,9 @@ static void set_event_from_interrupt(struct kfd_process *p,
  {
      if (ev && event_can_be_gpu_signaled(ev)) {
          acknowledge_signal(p, ev);
+        spin_lock(&ev->lock);
          set_event(ev);
+        spin_unlock(&ev->lock);
      }
  }
  @@ -693,7 +704,7 @@ void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id,
      if (!p)
          return; /* Presumably process exited. */
  -    mutex_lock(&p->event_mutex);
+    rcu_read_lock();
        if (valid_id_bits)
          ev = lookup_signaled_event_by_partial_id(p, partial_id,
@@ -721,7 +732,7 @@ void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id,
                  if (id >= KFD_SIGNAL_EVENT_LIMIT)
                      break;
  -                if (slots[id] != UNSIGNALED_EVENT_SLOT)
+                if (READ_ONCE(slots[id]) != UNSIGNALED_EVENT_SLOT)
                      set_event_from_interrupt(p, ev);
              }
          } else {
@@ -730,14 +741,14 @@ void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id,
               * only signaled events from the IDR.
               */
              for (id = 0; id < KFD_SIGNAL_EVENT_LIMIT; id++)
-                if (slots[id] != UNSIGNALED_EVENT_SLOT) {
+                if (READ_ONCE(slots[id]) != UNSIGNALED_EVENT_SLOT) {
                      ev = lookup_event_by_id(p, id);
                      set_event_from_interrupt(p, ev);
                  }
          }
      }
  -    mutex_unlock(&p->event_mutex);
+    rcu_read_unlock();
      kfd_unref_process(p);
  }
  @@ -767,9 +778,11 @@ static int init_event_waiter_get_status(struct kfd_process *p,
      if (!ev)
          return -EINVAL;
  +    spin_lock(&ev->lock);
      waiter->event = ev;
      waiter->activated = ev->signaled;
      ev->signaled = ev->signaled && !ev->auto_reset;
+    spin_unlock(&ev->lock);
        return 0;
  }
@@ -781,8 +794,11 @@ static void init_event_waiter_add_to_waitlist(struct kfd_event_waiter *waiter)
      /* Only add to the wait list if we actually need to
       * wait on this event.
       */
-    if (!waiter->activated)
+    if (!waiter->activated) {
+        spin_lock(&ev->lock);
          add_wait_queue(&ev->wq, &waiter->wait);
+        spin_unlock(&ev->lock);
+    }
  }
    /* test_event_condition - Test condition of events being waited for
@@ -802,10 +818,10 @@ static uint32_t test_event_condition(bool all, uint32_t num_events,
      uint32_t activated_count = 0;
        for (i = 0; i < num_events; i++) {
-        if (!event_waiters[i].event)
+        if (!READ_ONCE(event_waiters[i].event))
              return KFD_IOC_WAIT_RESULT_FAIL;
  -        if (event_waiters[i].activated) {
+        if (READ_ONCE(event_waiters[i].activated)) {
              if (!all)
                  return KFD_IOC_WAIT_RESULT_COMPLETE;
  @@ -834,6 +850,8 @@ static int copy_signaled_event_data(uint32_t num_events,
      for (i = 0; i < num_events; i++) {
          waiter = &event_waiters[i];
          event = waiter->event;
+        if (!event)
+            return -EINVAL; /* event was destroyed */
          if (waiter->activated && event->type == KFD_EVENT_TYPE_MEMORY) {
              dst = &data[i].memory_exception_data;
              src = &event->memory_exception_data;
@@ -844,11 +862,8 @@ static int copy_signaled_event_data(uint32_t num_events,
      }
        return 0;
-
  }
  -
-
  static long user_timeout_to_jiffies(uint32_t user_timeout_ms)
  {
      if (user_timeout_ms == KFD_EVENT_TIMEOUT_IMMEDIATE)
@@ -872,9 +887,12 @@ static void free_waiters(uint32_t num_events, struct kfd_event_waiter *waiters)
      uint32_t i;
        for (i = 0; i < num_events; i++)
-        if (waiters[i].event)
+        if (waiters[i].event) {
+            spin_lock(&waiters[i].event->lock);
              remove_wait_queue(&waiters[i].event->wq,
                        &waiters[i].wait);
+            spin_unlock(&waiters[i].event->lock);
+        }
        kfree(waiters);
  }
@@ -898,6 +916,9 @@ int kfd_wait_on_events(struct kfd_process *p,
          goto out;
      }
  +    /* Use p->event_mutex here to protect against concurrent creation and
+     * destruction of events while we initialize event_waiters.
+     */
      mutex_lock(&p->event_mutex);
        for (i = 0; i < num_events; i++) {
@@ -976,14 +997,19 @@ int kfd_wait_on_events(struct kfd_process *p,
      }
      __set_current_state(TASK_RUNNING);
  +    mutex_lock(&p->event_mutex);
      /* copy_signaled_event_data may sleep. So this has to happen
       * after the task state is set back to RUNNING.
+     *
+     * The event may also have been destroyed after signaling. So
+     * copy_signaled_event_data also must confirm that the event
+     * still exists. Therefore this must be under the p->event_mutex
+     * which is also held when events are destroyed.
       */
      if (!ret && *wait_result == KFD_IOC_WAIT_RESULT_COMPLETE)
          ret = copy_signaled_event_data(num_events,
                             event_waiters, events);
  -    mutex_lock(&p->event_mutex);
  out_unlock:
      free_waiters(num_events, event_waiters);
      mutex_unlock(&p->event_mutex);
@@ -1042,8 +1068,7 @@ int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma)
  }
    /*
- * Assumes that p->event_mutex is held and of course
- * that p is not going away (current or locked).
+ * Assumes that p is not going away.
   */
  static void lookup_events_by_type_and_signal(struct kfd_process *p,
          int type, void *event_data)
@@ -1055,6 +1080,8 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p,
        ev_data = (struct kfd_hsa_memory_exception_data *) event_data;
  +    rcu_read_lock();
+
      id = KFD_FIRST_NONSIGNAL_EVENT_ID;
      idr_for_each_entry_continue(&p->event_idr, ev, id)
          if (ev->type == type) {
@@ -1062,9 +1089,11 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p,
              dev_dbg(kfd_device,
                      "Event found: id %X type %d",
                      ev->event_id, ev->type);
+            spin_lock(&ev->lock);
              set_event(ev);
              if (ev->type == KFD_EVENT_TYPE_MEMORY && ev_data)
                  ev->memory_exception_data = *ev_data;
+            spin_unlock(&ev->lock);
          }
        if (type == KFD_EVENT_TYPE_MEMORY) {
@@ -1087,6 +1116,8 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p,
                  p->lead_thread->pid, p->pasid);
          }
      }
+
+    rcu_read_unlock();
  }
    #ifdef KFD_SUPPORT_IOMMU_V2
@@ -1162,16 +1193,10 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, u32 pasid,
        if (KFD_GC_VERSION(dev) != IP_VERSION(9, 1, 0) &&
          KFD_GC_VERSION(dev) != IP_VERSION(9, 2, 2) &&
-        KFD_GC_VERSION(dev) != IP_VERSION(9, 3, 0)) {
-        mutex_lock(&p->event_mutex);
-
-        /* Lookup events by type and signal them */
+        KFD_GC_VERSION(dev) != IP_VERSION(9, 3, 0))
          lookup_events_by_type_and_signal(p, KFD_EVENT_TYPE_MEMORY,
                  &memory_exception_data);
  -        mutex_unlock(&p->event_mutex);
-    }
-
      kfd_unref_process(p);
  }
  #endif /* KFD_SUPPORT_IOMMU_V2 */
@@ -1188,12 +1213,7 @@ void kfd_signal_hw_exception_event(u32 pasid)
      if (!p)
          return; /* Presumably process exited. */
  -    mutex_lock(&p->event_mutex);
-
-    /* Lookup events by type and signal them */
      lookup_events_by_type_and_signal(p, KFD_EVENT_TYPE_HW_EXCEPTION, NULL);
-
-    mutex_unlock(&p->event_mutex);
      kfd_unref_process(p);
  }
  @@ -1229,16 +1249,19 @@ void kfd_signal_vm_fault_event(struct kfd_dev *dev, u32 pasid,
              info->prot_write ? 1 : 0;
          memory_exception_data.failure.imprecise = 0;
      }
-    mutex_lock(&p->event_mutex);
+
+    rcu_read_lock();
        id = KFD_FIRST_NONSIGNAL_EVENT_ID;
      idr_for_each_entry_continue(&p->event_idr, ev, id)
          if (ev->type == KFD_EVENT_TYPE_MEMORY) {
+            spin_lock(&ev->lock);
              ev->memory_exception_data = memory_exception_data;
              set_event(ev);
+            spin_unlock(&ev->lock);
          }
  -    mutex_unlock(&p->event_mutex);
+    rcu_read_unlock();
      kfd_unref_process(p);
  }
  @@ -1272,22 +1295,28 @@ void kfd_signal_reset_event(struct kfd_dev *dev)
              continue;
          }
  -        mutex_lock(&p->event_mutex);
+        rcu_read_lock();
+
          id = KFD_FIRST_NONSIGNAL_EVENT_ID;
          idr_for_each_entry_continue(&p->event_idr, ev, id) {
              if (ev->type == KFD_EVENT_TYPE_HW_EXCEPTION) {
+                spin_lock(&ev->lock);
                  ev->hw_exception_data = hw_exception_data;
                  ev->hw_exception_data.gpu_id = user_gpu_id;
                  set_event(ev);
+                spin_unlock(&ev->lock);
              }
              if (ev->type == KFD_EVENT_TYPE_MEMORY &&
                  reset_cause == KFD_HW_EXCEPTION_ECC) {
+                spin_lock(&ev->lock);
                  ev->memory_exception_data = memory_exception_data;
                  ev->memory_exception_data.gpu_id = user_gpu_id;
                  set_event(ev);
+                spin_unlock(&ev->lock);
              }
          }
-        mutex_unlock(&p->event_mutex);
+
+        rcu_read_unlock();
      }
      srcu_read_unlock(&kfd_processes_srcu, idx);
  }
@@ -1320,19 +1349,25 @@ void kfd_signal_poison_consumed_event(struct kfd_dev *dev, u32 pasid)
      memory_exception_data.gpu_id = user_gpu_id;
      memory_exception_data.failure.imprecise = true;
  -    mutex_lock(&p->event_mutex);
+    rcu_read_lock();
+
      idr_for_each_entry_continue(&p->event_idr, ev, id) {
          if (ev->type == KFD_EVENT_TYPE_HW_EXCEPTION) {
+            spin_lock(&ev->lock);
              ev->hw_exception_data = hw_exception_data;
              set_event(ev);
+            spin_unlock(&ev->lock);
          }
            if (ev->type == KFD_EVENT_TYPE_MEMORY) {
+            spin_lock(&ev->lock);
              ev->memory_exception_data = memory_exception_data;
              set_event(ev);
+            spin_unlock(&ev->lock);
          }
      }
-    mutex_unlock(&p->event_mutex);
+
+    rcu_read_unlock();
        /* user application will handle SIGBUS signal */
      send_sig(SIGBUS, p->lead_thread, 0);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_events.h
index 1238af11916e..55d376f56021 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.h
@@ -59,6 +59,7 @@ struct kfd_event {
        int type;
  +    spinlock_t lock;
      wait_queue_head_t wq; /* List of event waiters. */
        /* Only for signal events. */




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux