[PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4

philip.yang@xxxxxxx (Philip Yang) · Fri, 14 Sep 2018 13:47:50 -0400

On 2018-09-14 03:51 AM, Christian KÃ¶nig wrote:
> Am 13.09.2018 um 23:51 schrieb Felix Kuehling:
>> On 2018-09-13 04:52 PM, Philip Yang wrote:
>>> Replace our MMU notifier with hmm_mirror_ops.sync_cpu_device_pagetables
>>> callback. Enable CONFIG_HMM and CONFIG_HMM_MIRROR as a dependency in
>>> DRM_AMDGPU_USERPTR Kconfig.
>>>
>>> It supports both KFD userptr and gfx userptr paths.
>>>
>>> This depends on several HMM patchset from JÃ©rÃ´me Glisse queued for
>>> upstream.
>>>
>>> Change-Id: Ie62c3c5e3c5b8521ab3b438d1eff2aa2a003835e
>>> Signed-off-by: Philip Yang <Philip.Yang at amd.com>
>>> ---
>>> Â  drivers/gpu/drm/amd/amdgpu/KconfigÂ Â Â Â  |Â Â  6 +-
>>> Â  drivers/gpu/drm/amd/amdgpu/MakefileÂ Â Â  |Â Â  2 +-
>>> Â  drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 121 
>>> ++++++++++++++-------------------
>>> Â  drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h |Â Â  2 +-
>>> Â  4 files changed, 56 insertions(+), 75 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig 
>>> b/drivers/gpu/drm/amd/amdgpu/Kconfig
>>> index 9221e54..960a633 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/Kconfig
>>> +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
>>> @@ -26,10 +26,10 @@ config DRM_AMDGPU_CIK
>>> Â  config DRM_AMDGPU_USERPTR
>>> Â Â Â Â Â  bool "Always enable userptr write support"
>>> Â Â Â Â Â  depends on DRM_AMDGPU
>>> -Â Â Â  select MMU_NOTIFIER
>>> +Â Â Â  select HMM_MIRROR
>>> Â Â Â Â Â  help
>>> -Â Â Â Â Â  This option selects CONFIG_MMU_NOTIFIER if it isn't already
>>> -Â Â Â Â Â  selected to enabled full userptr support.
>>> +Â Â Â Â Â  This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it
>>> +Â Â Â Â Â  isn't already selected to enabled full userptr support.
>>> Â  Â  config DRM_AMDGPU_GART_DEBUGFS
>>> Â Â Â Â Â  bool "Allow GART access through debugfs"
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
>>> b/drivers/gpu/drm/amd/amdgpu/Makefile
>>> index 138cb78..c1e5d43 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
>>> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
>>> @@ -171,7 +171,7 @@ endif
>>> Â  amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o
>>> Â  amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o
>>> Â  amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o
>>> -amdgpu-$(CONFIG_MMU_NOTIFIER) += amdgpu_mn.o
>>> +amdgpu-$(CONFIG_HMM) += amdgpu_mn.o
>>> Â  Â  include $(FULL_AMD_PATH)/powerplay/Makefile
>>> Â  diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>>> index e55508b..ad52f34 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>>> @@ -45,7 +45,7 @@
>>> Â  Â  #include <linux/firmware.h>
>>> Â  #include <linux/module.h>
>>> -#include <linux/mmu_notifier.h>
>>> +#include <linux/hmm.h>
>>> Â  #include <linux/interval_tree.h>
>>> Â  #include <drm/drmP.h>
>>> Â  #include <drm/drm.h>
>>> @@ -66,6 +66,7 @@
>> Need to remove @mn documentation.
>>
>>> Â Â  * @objects: interval tree containing amdgpu_mn_nodes
>>> Â Â  * @read_lock: mutex for recursive locking of @lock
>>> Â Â  * @recursion: depth of recursion
>>> + * @mirror: HMM mirror function support
>>> Â Â  *
>>> Â Â  * Data for each amdgpu device and process address space.
>>> Â Â  */
>>> @@ -73,7 +74,6 @@ struct amdgpu_mn {
>>> Â Â Â Â Â  /* constant after initialisation */
>>> Â Â Â Â Â  struct amdgpu_deviceÂ Â Â  *adev;
>>> Â Â Â Â Â  struct mm_structÂ Â Â  *mm;
>>> -Â Â Â  struct mmu_notifierÂ Â Â  mn;
>>> Â Â Â Â Â  enum amdgpu_mn_typeÂ Â Â  type;
>>> Â  Â Â Â Â Â  /* only used on destruction */
>>> @@ -87,6 +87,9 @@ struct amdgpu_mn {
>>> Â Â Â Â Â  struct rb_root_cachedÂ Â Â  objects;
>>> Â Â Â Â Â  struct mutexÂ Â Â Â Â Â Â  read_lock;
>>> Â Â Â Â Â  atomic_tÂ Â Â Â Â Â Â  recursion;
>>> +
>>> +Â Â Â  /* HMM mirror */
>>> +Â Â Â  struct hmm_mirrorÂ Â Â  mirror;
>>> Â  };
>>> Â  Â  /**
>>> @@ -103,7 +106,7 @@ struct amdgpu_mn_node {
>>> Â  };
>>> Â  Â  /**
>>> - * amdgpu_mn_destroy - destroy the MMU notifier
>>> + * amdgpu_mn_destroy - destroy the HMM mirror
>>> Â Â  *
>>> Â Â  * @work: previously sheduled work item
>>> Â Â  *
>>> @@ -129,28 +132,26 @@ static void amdgpu_mn_destroy(struct 
>>> work_struct *work)
>>> Â Â Â Â Â  }
>>> Â Â Â Â Â  up_write(&amn->lock);
>>> Â Â Â Â Â  mutex_unlock(&adev->mn_lock);
>>> -Â Â Â  mmu_notifier_unregister_no_release(&amn->mn, amn->mm);
>>> +Â Â Â  hmm_mirror_unregister(&amn->mirror);
>>> +
>>> Â Â Â Â Â  kfree(amn);
>>> Â  }
>>> Â  Â  /**
>>> Â Â  * amdgpu_mn_release - callback to notify about mm destruction
>> Update the function name in the comment.
>>
>>> Â Â  *
>>> - * @mn: our notifier
>>> - * @mm: the mm this callback is about
>>> + * @mirror: the HMM mirror (mm) this callback is about
>>> Â Â  *
>>> - * Shedule a work item to lazy destroy our notifier.
>>> + * Shedule a work item to lazy destroy HMM mirror.
>>> Â Â  */
>>> -static void amdgpu_mn_release(struct mmu_notifier *mn,
>>> -Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  struct mm_struct *mm)
>>> +static void amdgpu_hmm_mirror_release(struct hmm_mirror *mirror)
>>> Â  {
>>> -Â Â Â  struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
>>> +Â Â Â  struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, 
>>> mirror);
>>> Â  Â Â Â Â Â  INIT_WORK(&amn->work, amdgpu_mn_destroy);
>>> Â Â Â Â Â  schedule_work(&amn->work);
>>> Â  }
>>> Â  -
>>> Â  /**
>>> Â Â  * amdgpu_mn_lock - take the write side lock for this notifier
>>> Â Â  *
>>> @@ -237,21 +238,19 @@ static void amdgpu_mn_invalidate_node(struct 
>>> amdgpu_mn_node *node,
>>> Â  /**
>>> Â Â  * amdgpu_mn_invalidate_range_start_gfx - callback to notify about 
>>> mm change
>>> Â Â  *
>>> - * @mn: our notifier
>>> - * @mm: the mm this callback is about
>>> - * @start: start of updated range
>>> - * @end: end of updated range
>>> + * @mirror: the hmm_mirror (mm) is about to update
>>> + * @update: the update start, end address
>>> Â Â  *
>>> Â Â  * Block for operations on BOs to finish and mark pages as 
>>> accessed and
>>> Â Â  * potentially dirty.
>>> Â Â  */
>>> -static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier 
>>> *mn,
>>> -Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  struct mm_struct *mm,
>>> -Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  unsigned long start,
>>> -Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  unsigned long end,
>>> -Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  bool blockable)
>>> +static int amdgpu_mn_invalidate_range_start_gfx(struct hmm_mirror 
>>> *mirror,
>>> +Â Â Â Â Â Â Â Â Â Â Â  const struct hmm_update *update)
>>> Â  {
>>> -Â Â Â  struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
>>> +Â Â Â  struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, 
>>> mirror);
>>> +Â Â Â  unsigned long start = update->start;
>>> +Â Â Â  unsigned long end = update->end;
>>> +Â Â Â  bool blockable = update->blockable;
>>> Â Â Â Â Â  struct interval_tree_node *it;
>>> Â  Â Â Â Â Â  /* notification is exclusive, but interval is inclusive */
>>> @@ -278,28 +277,28 @@ static int 
>>> amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
>>> Â Â Â Â Â Â Â Â Â  amdgpu_mn_invalidate_node(node, start, end);
>>> Â Â Â Â Â  }
>>> Â  +Â Â Â  amdgpu_mn_read_unlock(amn);
>>> +
>> amdgpu_mn_read_lock/unlock support recursive locking for multiple
>> overlapping or nested invalidation ranges. But if you'r locking and
>> unlocking in the same function. Is that still a concern?
>
I don't understand the possible recursive case, but 
amdgpu_mn_read_lock() still support recursive locking.
> Well the real problem is that unlocking them here won't work.
>
> We need to hold the lock until we are sure that the operation which 
> updates the page tables is completed.
>
The reason for this change is because hmm mirror has invalidate_start 
callback, no invalidate_end callback

Check mmu_notifier.c and hmm.c again, below is entire logic to update 
CPU page tables and callback:

mn lock amn->lock is used to protect interval tree access because user 
may submit/register new userptr anytime.
This is same for old and new way.

step 2 guarantee the GPU operation is done before updating CPU page table.

So I think the change is safe. We don't need hold mn lock until the CPU 
page tables update is completed.

Old:
 Â Â  1. down_read_non_owner(&amn->lock)
 Â Â  2. loop to handle BOs from node->bos through interval tree 
amn->object nodes
 Â Â Â Â Â Â  gfx: wait for pending BOs fence operation done, mark user pages 
dirty
 Â Â Â Â Â Â  kfd: evict user queues of the process, wait for queue unmap/map 
operation done
 Â Â  3. update CPU page tables
 Â Â  4. up_read(&amn->lock)

New, switch step 3 and 4
 Â Â  1. down_read_non_owner(&amn->lock)
 Â Â  2. loop to handle BOs from node->bos through interval tree 
amn->object nodes
 Â Â Â Â Â Â  gfx: wait for pending BOs fence operation done, mark user pages 
dirty
 Â Â Â Â Â Â  kfd: evict user queues of the process, wait for queue unmap/map 
operation done
 Â Â  3. up_read(&amn->lock)
 Â Â  4. update CPU page tables

Regards,
Philip
> Christian.
>
>>
>>> Â Â Â Â Â  return 0;
>>> Â  }
>>> Â  Â  /**
>>> Â Â  * amdgpu_mn_invalidate_range_start_hsa - callback to notify about 
>>> mm change
>>> Â Â  *
>>> - * @mn: our notifier
>>> - * @mm: the mm this callback is about
>>> - * @start: start of updated range
>>> - * @end: end of updated range
>>> + * @mirror: the hmm_mirror (mm) is about to update
>>> + * @update: the update start, end address
>>> Â Â  *
>>> Â Â  * We temporarily evict all BOs between start and end. This
>>> Â Â  * necessitates evicting all user-mode queues of the process. The BOs
>>> Â Â  * are restorted in amdgpu_mn_invalidate_range_end_hsa.
>>> Â Â  */
>>> -static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier 
>>> *mn,
>>> -Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  struct mm_struct *mm,
>>> -Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  unsigned long start,
>>> -Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  unsigned long end,
>>> -Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  bool blockable)
>>> +static int amdgpu_mn_invalidate_range_start_hsa(struct hmm_mirror 
>>> *mirror,
>>> +Â Â Â Â Â Â Â Â Â Â Â  const struct hmm_update *update)
>>> Â  {
>>> -Â Â Â  struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
>>> +Â Â Â  struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, 
>>> mirror);
>>> +Â Â Â  unsigned long start = update->start;
>>> +Â Â Â  unsigned long end = update->end;
>>> +Â Â Â  bool blockable = update->blockable;
>>> Â Â Â Â Â  struct interval_tree_node *it;
>>> Â  Â Â Â Â Â  /* notification is exclusive, but interval is inclusive */
>>> @@ -326,59 +325,41 @@ static int 
>>> amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
>>> Â  Â Â Â Â Â Â Â Â Â Â Â Â Â  if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
>>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  start, end))
>>> -Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  amdgpu_amdkfd_evict_userptr(mem, mm);
>>> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  amdgpu_amdkfd_evict_userptr(mem, amn->mm);
>>> Â Â Â Â Â Â Â Â Â  }
>>> Â Â Â Â Â  }
>>> Â  +Â Â Â  amdgpu_mn_read_unlock(amn);
>>> +
>>> Â Â Â Â Â  return 0;
>>> Â  }
>>> Â  -/**
>>> - * amdgpu_mn_invalidate_range_end - callback to notify about mm change
>>> - *
>>> - * @mn: our notifier
>>> - * @mm: the mm this callback is about
>>> - * @start: start of updated range
>>> - * @end: end of updated range
>>> - *
>>> - * Release the lock again to allow new command submissions.
>>> +/* Low bits of any reasonable mm pointer will be unused due to struct
>>> + * alignment. Use these bits to make a unique key from the mm pointer
>>> + * and notifier type.
>>> Â Â  */
>>> -static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn,
>>> -Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  struct mm_struct *mm,
>>> -Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  unsigned long start,
>>> -Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  unsigned long end)
>>> -{
>>> -Â Â Â  struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
>>> -
>>> -Â Â Â  amdgpu_mn_read_unlock(amn);
>>> -}
>>> +#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
>>> Â  -static const struct mmu_notifier_ops amdgpu_mn_ops[] = {
>>> +static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = {
>>> Â Â Â Â Â  [AMDGPU_MN_TYPE_GFX] = {
>>> -Â Â Â Â Â Â Â  .release = amdgpu_mn_release,
>>> -Â Â Â Â Â Â Â  .invalidate_range_start = 
>>> amdgpu_mn_invalidate_range_start_gfx,
>>> -Â Â Â Â Â Â Â  .invalidate_range_end = amdgpu_mn_invalidate_range_end,
>>> +Â Â Â Â Â Â Â  .sync_cpu_device_pagetables =
>>> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  amdgpu_mn_invalidate_range_start_gfx,
>>> +Â Â Â Â Â Â Â  .release = amdgpu_hmm_mirror_release
>>> Â Â Â Â Â  },
>>> Â Â Â Â Â  [AMDGPU_MN_TYPE_HSA] = {
>>> -Â Â Â Â Â Â Â  .release = amdgpu_mn_release,
>>> -Â Â Â Â Â Â Â  .invalidate_range_start = 
>>> amdgpu_mn_invalidate_range_start_hsa,
>>> -Â Â Â Â Â Â Â  .invalidate_range_end = amdgpu_mn_invalidate_range_end,
>>> +Â Â Â Â Â Â Â  .sync_cpu_device_pagetables =
>>> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  amdgpu_mn_invalidate_range_start_hsa,
>>> +Â Â Â Â Â Â Â  .release = amdgpu_hmm_mirror_release
>>> Â Â Â Â Â  },
>>> Â  };
>>> Â  -/* Low bits of any reasonable mm pointer will be unused due to 
>>> struct
>>> - * alignment. Use these bits to make a unique key from the mm pointer
>>> - * and notifier type.
>>> - */
>>> -#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
>>> -
>>> Â  /**
>>> - * amdgpu_mn_get - create notifier context
>>> + * amdgpu_mn_get - create HMM mirror context
>>> Â Â  *
>>> Â Â  * @adev: amdgpu device pointer
>>> Â Â  * @type: type of MMU notifier context
>>> Â Â  *
>>> - * Creates a notifier context for current->mm.
>>> + * Creates a HMM mirror context for current->mm.
>>> Â Â  */
>>> Â  struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
>>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  enum amdgpu_mn_type type)
>>> @@ -408,12 +389,12 @@ struct amdgpu_mn *amdgpu_mn_get(struct 
>>> amdgpu_device *adev,
>>> Â Â Â Â Â  amn->mm = mm;
>>> Â Â Â Â Â  init_rwsem(&amn->lock);
>>> Â Â Â Â Â  amn->type = type;
>>> -Â Â Â  amn->mn.ops = &amdgpu_mn_ops[type];
>>> Â Â Â Â Â  amn->objects = RB_ROOT_CACHED;
>>> Â Â Â Â Â  mutex_init(&amn->read_lock);
>>> Â Â Â Â Â  atomic_set(&amn->recursion, 0);
>>> Â  -Â Â Â  r = __mmu_notifier_register(&amn->mn, mm);
>>> +Â Â Â  amn->mirror.ops = &amdgpu_hmm_mirror_ops[type];
>>> +Â Â Â  r = hmm_mirror_register(&amn->mirror, mm);
>>> Â Â Â Â Â  if (r)
>>> Â Â Â Â Â Â Â Â Â  goto free_amn;
>>> Â  @@ -439,7 +420,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct 
>>> amdgpu_device *adev,
>>> Â Â  * @bo: amdgpu buffer object
>>> Â Â  * @addr: userptr addr we should monitor
>>> Â Â  *
>>> - * Registers an MMU notifier for the given BO at the specified 
>>> address.
>>> + * Registers an HMM mirror for the given BO at the specified address.
>>> Â Â  * Returns 0 on success, -ERRNO if anything goes wrong.
>>> Â Â  */
>>> Â  int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
>>> @@ -495,11 +476,11 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, 
>>> unsigned long addr)
>>> Â  }
>>> Â  Â  /**
>>> - * amdgpu_mn_unregister - unregister a BO for notifier updates
>>> + * amdgpu_mn_unregister - unregister a BO for HMM mirror updates
>>> Â Â  *
>>> Â Â  * @bo: amdgpu buffer object
>>> Â Â  *
>>> - * Remove any registration of MMU notifier updates from the buffer 
>>> object.
>>> + * Remove any registration of HMM mirror updates from the buffer 
>>> object.
>>> Â Â  */
>>> Â  void amdgpu_mn_unregister(struct amdgpu_bo *bo)
>>> Â  {
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>>> index eb0f432..0e27526 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>>> @@ -34,7 +34,7 @@ enum amdgpu_mn_type {
>>> Â Â Â Â Â  AMDGPU_MN_TYPE_HSA,
>>> Â  };
>>> Â  -#if defined(CONFIG_MMU_NOTIFIER)
>>> +#if defined(CONFIG_HMM)
>>> Â  void amdgpu_mn_lock(struct amdgpu_mn *mn);
>>> Â  void amdgpu_mn_unlock(struct amdgpu_mn *mn);
>>> Â  struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>