On 2018-09-14 03:51 AM, Christian König wrote: > Am 13.09.2018 um 23:51 schrieb Felix Kuehling: >> On 2018-09-13 04:52 PM, Philip Yang wrote: >>> Replace our MMU notifier with hmm_mirror_ops.sync_cpu_device_pagetables >>> callback. Enable CONFIG_HMM and CONFIG_HMM_MIRROR as a dependency in >>> DRM_AMDGPU_USERPTR Kconfig. >>> >>> It supports both KFD userptr and gfx userptr paths. >>> >>> This depends on several HMM patchset from Jérôme Glisse queued for >>> upstream. >>> >>> Change-Id: Ie62c3c5e3c5b8521ab3b438d1eff2aa2a003835e >>> Signed-off-by: Philip Yang <Philip.Yang at amd.com> >>> --- >>>  drivers/gpu/drm/amd/amdgpu/Kconfig    |  6 +- >>>  drivers/gpu/drm/amd/amdgpu/Makefile   |  2 +- >>>  drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 121 >>> ++++++++++++++------------------- >>>  drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h |  2 +- >>>  4 files changed, 56 insertions(+), 75 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig >>> b/drivers/gpu/drm/amd/amdgpu/Kconfig >>> index 9221e54..960a633 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/Kconfig >>> +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig >>> @@ -26,10 +26,10 @@ config DRM_AMDGPU_CIK >>>  config DRM_AMDGPU_USERPTR >>>      bool "Always enable userptr write support" >>>      depends on DRM_AMDGPU >>> -   select MMU_NOTIFIER >>> +   select HMM_MIRROR >>>      help >>> -     This option selects CONFIG_MMU_NOTIFIER if it isn't already >>> -     selected to enabled full userptr support. >>> +     This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it >>> +     isn't already selected to enabled full userptr support. >>>   config DRM_AMDGPU_GART_DEBUGFS >>>      bool "Allow GART access through debugfs" >>> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile >>> b/drivers/gpu/drm/amd/amdgpu/Makefile >>> index 138cb78..c1e5d43 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/Makefile >>> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile >>> @@ -171,7 +171,7 @@ endif >>>  amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o >>>  amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o >>>  amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o >>> -amdgpu-$(CONFIG_MMU_NOTIFIER) += amdgpu_mn.o >>> +amdgpu-$(CONFIG_HMM) += amdgpu_mn.o >>>   include $(FULL_AMD_PATH)/powerplay/Makefile >>>  diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c >>> index e55508b..ad52f34 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c >>> @@ -45,7 +45,7 @@ >>>   #include <linux/firmware.h> >>>  #include <linux/module.h> >>> -#include <linux/mmu_notifier.h> >>> +#include <linux/hmm.h> >>>  #include <linux/interval_tree.h> >>>  #include <drm/drmP.h> >>>  #include <drm/drm.h> >>> @@ -66,6 +66,7 @@ >> Need to remove @mn documentation. >> >>>   * @objects: interval tree containing amdgpu_mn_nodes >>>   * @read_lock: mutex for recursive locking of @lock >>>   * @recursion: depth of recursion >>> + * @mirror: HMM mirror function support >>>   * >>>   * Data for each amdgpu device and process address space. >>>   */ >>> @@ -73,7 +74,6 @@ struct amdgpu_mn { >>>      /* constant after initialisation */ >>>      struct amdgpu_device   *adev; >>>      struct mm_struct   *mm; >>> -   struct mmu_notifier   mn; >>>      enum amdgpu_mn_type   type; >>>       /* only used on destruction */ >>> @@ -87,6 +87,9 @@ struct amdgpu_mn { >>>      struct rb_root_cached   objects; >>>      struct mutex       read_lock; >>>      atomic_t       recursion; >>> + >>> +   /* HMM mirror */ >>> +   struct hmm_mirror   mirror; >>>  }; >>>   /** >>> @@ -103,7 +106,7 @@ struct amdgpu_mn_node { >>>  }; >>>   /** >>> - * amdgpu_mn_destroy - destroy the MMU notifier >>> + * amdgpu_mn_destroy - destroy the HMM mirror >>>   * >>>   * @work: previously sheduled work item >>>   * >>> @@ -129,28 +132,26 @@ static void amdgpu_mn_destroy(struct >>> work_struct *work) >>>      } >>>      up_write(&amn->lock); >>>      mutex_unlock(&adev->mn_lock); >>> -   mmu_notifier_unregister_no_release(&amn->mn, amn->mm); >>> +   hmm_mirror_unregister(&amn->mirror); >>> + >>>      kfree(amn); >>>  } >>>   /** >>>   * amdgpu_mn_release - callback to notify about mm destruction >> Update the function name in the comment. >> >>>   * >>> - * @mn: our notifier >>> - * @mm: the mm this callback is about >>> + * @mirror: the HMM mirror (mm) this callback is about >>>   * >>> - * Shedule a work item to lazy destroy our notifier. >>> + * Shedule a work item to lazy destroy HMM mirror. >>>   */ >>> -static void amdgpu_mn_release(struct mmu_notifier *mn, >>> -                 struct mm_struct *mm) >>> +static void amdgpu_hmm_mirror_release(struct hmm_mirror *mirror) >>>  { >>> -   struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); >>> +   struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, >>> mirror); >>>       INIT_WORK(&amn->work, amdgpu_mn_destroy); >>>      schedule_work(&amn->work); >>>  } >>>  - >>>  /** >>>   * amdgpu_mn_lock - take the write side lock for this notifier >>>   * >>> @@ -237,21 +238,19 @@ static void amdgpu_mn_invalidate_node(struct >>> amdgpu_mn_node *node, >>>  /** >>>   * amdgpu_mn_invalidate_range_start_gfx - callback to notify about >>> mm change >>>   * >>> - * @mn: our notifier >>> - * @mm: the mm this callback is about >>> - * @start: start of updated range >>> - * @end: end of updated range >>> + * @mirror: the hmm_mirror (mm) is about to update >>> + * @update: the update start, end address >>>   * >>>   * Block for operations on BOs to finish and mark pages as >>> accessed and >>>   * potentially dirty. >>>   */ >>> -static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier >>> *mn, >>> -                        struct mm_struct *mm, >>> -                        unsigned long start, >>> -                        unsigned long end, >>> -                        bool blockable) >>> +static int amdgpu_mn_invalidate_range_start_gfx(struct hmm_mirror >>> *mirror, >>> +           const struct hmm_update *update) >>>  { >>> -   struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); >>> +   struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, >>> mirror); >>> +   unsigned long start = update->start; >>> +   unsigned long end = update->end; >>> +   bool blockable = update->blockable; >>>      struct interval_tree_node *it; >>>       /* notification is exclusive, but interval is inclusive */ >>> @@ -278,28 +277,28 @@ static int >>> amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn, >>>          amdgpu_mn_invalidate_node(node, start, end); >>>      } >>>  +   amdgpu_mn_read_unlock(amn); >>> + >> amdgpu_mn_read_lock/unlock support recursive locking for multiple >> overlapping or nested invalidation ranges. But if you'r locking and >> unlocking in the same function. Is that still a concern? > I don't understand the possible recursive case, but amdgpu_mn_read_lock() still support recursive locking. > Well the real problem is that unlocking them here won't work. > > We need to hold the lock until we are sure that the operation which > updates the page tables is completed. > The reason for this change is because hmm mirror has invalidate_start callback, no invalidate_end callback Check mmu_notifier.c and hmm.c again, below is entire logic to update CPU page tables and callback: mn lock amn->lock is used to protect interval tree access because user may submit/register new userptr anytime. This is same for old and new way. step 2 guarantee the GPU operation is done before updating CPU page table. So I think the change is safe. We don't need hold mn lock until the CPU page tables update is completed. Old:   1. down_read_non_owner(&amn->lock)   2. loop to handle BOs from node->bos through interval tree amn->object nodes       gfx: wait for pending BOs fence operation done, mark user pages dirty       kfd: evict user queues of the process, wait for queue unmap/map operation done   3. update CPU page tables   4. up_read(&amn->lock) New, switch step 3 and 4   1. down_read_non_owner(&amn->lock)   2. loop to handle BOs from node->bos through interval tree amn->object nodes       gfx: wait for pending BOs fence operation done, mark user pages dirty       kfd: evict user queues of the process, wait for queue unmap/map operation done   3. up_read(&amn->lock)   4. update CPU page tables Regards, Philip > Christian. > >> >>>      return 0; >>>  } >>>   /** >>>   * amdgpu_mn_invalidate_range_start_hsa - callback to notify about >>> mm change >>>   * >>> - * @mn: our notifier >>> - * @mm: the mm this callback is about >>> - * @start: start of updated range >>> - * @end: end of updated range >>> + * @mirror: the hmm_mirror (mm) is about to update >>> + * @update: the update start, end address >>>   * >>>   * We temporarily evict all BOs between start and end. This >>>   * necessitates evicting all user-mode queues of the process. The BOs >>>   * are restorted in amdgpu_mn_invalidate_range_end_hsa. >>>   */ >>> -static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier >>> *mn, >>> -                        struct mm_struct *mm, >>> -                        unsigned long start, >>> -                        unsigned long end, >>> -                        bool blockable) >>> +static int amdgpu_mn_invalidate_range_start_hsa(struct hmm_mirror >>> *mirror, >>> +           const struct hmm_update *update) >>>  { >>> -   struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); >>> +   struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, >>> mirror); >>> +   unsigned long start = update->start; >>> +   unsigned long end = update->end; >>> +   bool blockable = update->blockable; >>>      struct interval_tree_node *it; >>>       /* notification is exclusive, but interval is inclusive */ >>> @@ -326,59 +325,41 @@ static int >>> amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn, >>>               if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, >>>                               start, end)) >>> -               amdgpu_amdkfd_evict_userptr(mem, mm); >>> +               amdgpu_amdkfd_evict_userptr(mem, amn->mm); >>>          } >>>      } >>>  +   amdgpu_mn_read_unlock(amn); >>> + >>>      return 0; >>>  } >>>  -/** >>> - * amdgpu_mn_invalidate_range_end - callback to notify about mm change >>> - * >>> - * @mn: our notifier >>> - * @mm: the mm this callback is about >>> - * @start: start of updated range >>> - * @end: end of updated range >>> - * >>> - * Release the lock again to allow new command submissions. >>> +/* Low bits of any reasonable mm pointer will be unused due to struct >>> + * alignment. Use these bits to make a unique key from the mm pointer >>> + * and notifier type. >>>   */ >>> -static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn, >>> -                      struct mm_struct *mm, >>> -                      unsigned long start, >>> -                      unsigned long end) >>> -{ >>> -   struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); >>> - >>> -   amdgpu_mn_read_unlock(amn); >>> -} >>> +#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type)) >>>  -static const struct mmu_notifier_ops amdgpu_mn_ops[] = { >>> +static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = { >>>      [AMDGPU_MN_TYPE_GFX] = { >>> -       .release = amdgpu_mn_release, >>> -       .invalidate_range_start = >>> amdgpu_mn_invalidate_range_start_gfx, >>> -       .invalidate_range_end = amdgpu_mn_invalidate_range_end, >>> +       .sync_cpu_device_pagetables = >>> +               amdgpu_mn_invalidate_range_start_gfx, >>> +       .release = amdgpu_hmm_mirror_release >>>      }, >>>      [AMDGPU_MN_TYPE_HSA] = { >>> -       .release = amdgpu_mn_release, >>> -       .invalidate_range_start = >>> amdgpu_mn_invalidate_range_start_hsa, >>> -       .invalidate_range_end = amdgpu_mn_invalidate_range_end, >>> +       .sync_cpu_device_pagetables = >>> +               amdgpu_mn_invalidate_range_start_hsa, >>> +       .release = amdgpu_hmm_mirror_release >>>      }, >>>  }; >>>  -/* Low bits of any reasonable mm pointer will be unused due to >>> struct >>> - * alignment. Use these bits to make a unique key from the mm pointer >>> - * and notifier type. >>> - */ >>> -#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type)) >>> - >>>  /** >>> - * amdgpu_mn_get - create notifier context >>> + * amdgpu_mn_get - create HMM mirror context >>>   * >>>   * @adev: amdgpu device pointer >>>   * @type: type of MMU notifier context >>>   * >>> - * Creates a notifier context for current->mm. >>> + * Creates a HMM mirror context for current->mm. >>>   */ >>>  struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, >>>                  enum amdgpu_mn_type type) >>> @@ -408,12 +389,12 @@ struct amdgpu_mn *amdgpu_mn_get(struct >>> amdgpu_device *adev, >>>      amn->mm = mm; >>>      init_rwsem(&amn->lock); >>>      amn->type = type; >>> -   amn->mn.ops = &amdgpu_mn_ops[type]; >>>      amn->objects = RB_ROOT_CACHED; >>>      mutex_init(&amn->read_lock); >>>      atomic_set(&amn->recursion, 0); >>>  -   r = __mmu_notifier_register(&amn->mn, mm); >>> +   amn->mirror.ops = &amdgpu_hmm_mirror_ops[type]; >>> +   r = hmm_mirror_register(&amn->mirror, mm); >>>      if (r) >>>          goto free_amn; >>>  @@ -439,7 +420,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct >>> amdgpu_device *adev, >>>   * @bo: amdgpu buffer object >>>   * @addr: userptr addr we should monitor >>>   * >>> - * Registers an MMU notifier for the given BO at the specified >>> address. >>> + * Registers an HMM mirror for the given BO at the specified address. >>>   * Returns 0 on success, -ERRNO if anything goes wrong. >>>   */ >>>  int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) >>> @@ -495,11 +476,11 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, >>> unsigned long addr) >>>  } >>>   /** >>> - * amdgpu_mn_unregister - unregister a BO for notifier updates >>> + * amdgpu_mn_unregister - unregister a BO for HMM mirror updates >>>   * >>>   * @bo: amdgpu buffer object >>>   * >>> - * Remove any registration of MMU notifier updates from the buffer >>> object. >>> + * Remove any registration of HMM mirror updates from the buffer >>> object. >>>   */ >>>  void amdgpu_mn_unregister(struct amdgpu_bo *bo) >>>  { >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h >>> index eb0f432..0e27526 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h >>> @@ -34,7 +34,7 @@ enum amdgpu_mn_type { >>>      AMDGPU_MN_TYPE_HSA, >>>  }; >>>  -#if defined(CONFIG_MMU_NOTIFIER) >>> +#if defined(CONFIG_HMM) >>>  void amdgpu_mn_lock(struct amdgpu_mn *mn); >>>  void amdgpu_mn_unlock(struct amdgpu_mn *mn); >>>  struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, >> _______________________________________________ >> amd-gfx mailing list >> amd-gfx at lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/amd-gfx >