Am 14.09.2018 um 19:47 schrieb Philip Yang: > On 2018-09-14 03:51 AM, Christian König wrote: >> Am 13.09.2018 um 23:51 schrieb Felix Kuehling: >>> On 2018-09-13 04:52 PM, Philip Yang wrote: >>>> Replace our MMU notifier with >>>> hmm_mirror_ops.sync_cpu_device_pagetables >>>> callback. Enable CONFIG_HMM and CONFIG_HMM_MIRROR as a dependency in >>>> DRM_AMDGPU_USERPTR Kconfig. >>>> >>>> It supports both KFD userptr and gfx userptr paths. >>>> >>>> This depends on several HMM patchset from Jérôme Glisse queued for >>>> upstream. >>>> >>>> Change-Id: Ie62c3c5e3c5b8521ab3b438d1eff2aa2a003835e >>>> Signed-off-by: Philip Yang <Philip.Yang at amd.com> >>>> --- >>>>  drivers/gpu/drm/amd/amdgpu/Kconfig    |  6 +- >>>>  drivers/gpu/drm/amd/amdgpu/Makefile   |  2 +- >>>>  drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 121 >>>> ++++++++++++++------------------- >>>>  drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h |  2 +- >>>>  4 files changed, 56 insertions(+), 75 deletions(-) >>>> >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig >>>> b/drivers/gpu/drm/amd/amdgpu/Kconfig >>>> index 9221e54..960a633 100644 >>>> --- a/drivers/gpu/drm/amd/amdgpu/Kconfig >>>> +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig >>>> @@ -26,10 +26,10 @@ config DRM_AMDGPU_CIK >>>>  config DRM_AMDGPU_USERPTR >>>>      bool "Always enable userptr write support" >>>>      depends on DRM_AMDGPU >>>> -   select MMU_NOTIFIER >>>> +   select HMM_MIRROR >>>>      help >>>> -     This option selects CONFIG_MMU_NOTIFIER if it isn't already >>>> -     selected to enabled full userptr support. >>>> +     This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it >>>> +     isn't already selected to enabled full userptr support. >>>>   config DRM_AMDGPU_GART_DEBUGFS >>>>      bool "Allow GART access through debugfs" >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile >>>> b/drivers/gpu/drm/amd/amdgpu/Makefile >>>> index 138cb78..c1e5d43 100644 >>>> --- a/drivers/gpu/drm/amd/amdgpu/Makefile >>>> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile >>>> @@ -171,7 +171,7 @@ endif >>>>  amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o >>>>  amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o >>>>  amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o >>>> -amdgpu-$(CONFIG_MMU_NOTIFIER) += amdgpu_mn.o >>>> +amdgpu-$(CONFIG_HMM) += amdgpu_mn.o >>>>   include $(FULL_AMD_PATH)/powerplay/Makefile >>>>  diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c >>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c >>>> index e55508b..ad52f34 100644 >>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c >>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c >>>> @@ -45,7 +45,7 @@ >>>>   #include <linux/firmware.h> >>>>  #include <linux/module.h> >>>> -#include <linux/mmu_notifier.h> >>>> +#include <linux/hmm.h> >>>>  #include <linux/interval_tree.h> >>>>  #include <drm/drmP.h> >>>>  #include <drm/drm.h> >>>> @@ -66,6 +66,7 @@ >>> Need to remove @mn documentation. >>> >>>>   * @objects: interval tree containing amdgpu_mn_nodes >>>>   * @read_lock: mutex for recursive locking of @lock >>>>   * @recursion: depth of recursion >>>> + * @mirror: HMM mirror function support >>>>   * >>>>   * Data for each amdgpu device and process address space. >>>>   */ >>>> @@ -73,7 +74,6 @@ struct amdgpu_mn { >>>>      /* constant after initialisation */ >>>>      struct amdgpu_device   *adev; >>>>      struct mm_struct   *mm; >>>> -   struct mmu_notifier   mn; >>>>      enum amdgpu_mn_type   type; >>>>       /* only used on destruction */ >>>> @@ -87,6 +87,9 @@ struct amdgpu_mn { >>>>      struct rb_root_cached   objects; >>>>      struct mutex       read_lock; >>>>      atomic_t       recursion; >>>> + >>>> +   /* HMM mirror */ >>>> +   struct hmm_mirror   mirror; >>>>  }; >>>>   /** >>>> @@ -103,7 +106,7 @@ struct amdgpu_mn_node { >>>>  }; >>>>   /** >>>> - * amdgpu_mn_destroy - destroy the MMU notifier >>>> + * amdgpu_mn_destroy - destroy the HMM mirror >>>>   * >>>>   * @work: previously sheduled work item >>>>   * >>>> @@ -129,28 +132,26 @@ static void amdgpu_mn_destroy(struct >>>> work_struct *work) >>>>      } >>>>      up_write(&amn->lock); >>>>      mutex_unlock(&adev->mn_lock); >>>> -   mmu_notifier_unregister_no_release(&amn->mn, amn->mm); >>>> +   hmm_mirror_unregister(&amn->mirror); >>>> + >>>>      kfree(amn); >>>>  } >>>>   /** >>>>   * amdgpu_mn_release - callback to notify about mm destruction >>> Update the function name in the comment. >>> >>>>   * >>>> - * @mn: our notifier >>>> - * @mm: the mm this callback is about >>>> + * @mirror: the HMM mirror (mm) this callback is about >>>>   * >>>> - * Shedule a work item to lazy destroy our notifier. >>>> + * Shedule a work item to lazy destroy HMM mirror. >>>>   */ >>>> -static void amdgpu_mn_release(struct mmu_notifier *mn, >>>> -                 struct mm_struct *mm) >>>> +static void amdgpu_hmm_mirror_release(struct hmm_mirror *mirror) >>>>  { >>>> -   struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); >>>> +   struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, >>>> mirror); >>>>       INIT_WORK(&amn->work, amdgpu_mn_destroy); >>>>      schedule_work(&amn->work); >>>>  } >>>>  - >>>>  /** >>>>   * amdgpu_mn_lock - take the write side lock for this notifier >>>>   * >>>> @@ -237,21 +238,19 @@ static void amdgpu_mn_invalidate_node(struct >>>> amdgpu_mn_node *node, >>>>  /** >>>>   * amdgpu_mn_invalidate_range_start_gfx - callback to notify >>>> about mm change >>>>   * >>>> - * @mn: our notifier >>>> - * @mm: the mm this callback is about >>>> - * @start: start of updated range >>>> - * @end: end of updated range >>>> + * @mirror: the hmm_mirror (mm) is about to update >>>> + * @update: the update start, end address >>>>   * >>>>   * Block for operations on BOs to finish and mark pages as >>>> accessed and >>>>   * potentially dirty. >>>>   */ >>>> -static int amdgpu_mn_invalidate_range_start_gfx(struct >>>> mmu_notifier *mn, >>>> -                        struct mm_struct *mm, >>>> -                        unsigned long start, >>>> -                        unsigned long end, >>>> -                        bool blockable) >>>> +static int amdgpu_mn_invalidate_range_start_gfx(struct hmm_mirror >>>> *mirror, >>>> +           const struct hmm_update *update) >>>>  { >>>> -   struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); >>>> +   struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, >>>> mirror); >>>> +   unsigned long start = update->start; >>>> +   unsigned long end = update->end; >>>> +   bool blockable = update->blockable; >>>>      struct interval_tree_node *it; >>>>       /* notification is exclusive, but interval is inclusive */ >>>> @@ -278,28 +277,28 @@ static int >>>> amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn, >>>>          amdgpu_mn_invalidate_node(node, start, end); >>>>      } >>>>  +   amdgpu_mn_read_unlock(amn); >>>> + >>> amdgpu_mn_read_lock/unlock support recursive locking for multiple >>> overlapping or nested invalidation ranges. But if you'r locking and >>> unlocking in the same function. Is that still a concern? >> > I don't understand the possible recursive case, but > amdgpu_mn_read_lock() still support recursive locking. >> Well the real problem is that unlocking them here won't work. >> >> We need to hold the lock until we are sure that the operation which >> updates the page tables is completed. >> > The reason for this change is because hmm mirror has invalidate_start > callback, no invalidate_end callback > > Check mmu_notifier.c and hmm.c again, below is entire logic to update > CPU page tables and callback: > > mn lock amn->lock is used to protect interval tree access because user > may submit/register new userptr anytime. > This is same for old and new way. > > step 2 guarantee the GPU operation is done before updating CPU page > table. > > So I think the change is safe. We don't need hold mn lock until the > CPU page tables update is completed. No, that isn't even remotely correct. The lock doesn't protects the interval tree. > > Old: >   1. down_read_non_owner(&amn->lock) >   2. loop to handle BOs from node->bos through interval tree > amn->object nodes >       gfx: wait for pending BOs fence operation done, mark user pages > dirty >       kfd: evict user queues of the process, wait for queue unmap/map > operation done >   3. update CPU page tables >   4. up_read(&amn->lock) > > New, switch step 3 and 4 >   1. down_read_non_owner(&amn->lock) >   2. loop to handle BOs from node->bos through interval tree > amn->object nodes >       gfx: wait for pending BOs fence operation done, mark user pages > dirty >       kfd: evict user queues of the process, wait for queue unmap/map > operation done >   3. up_read(&amn->lock) >   4. update CPU page tables The lock is there to make sure that we serialize page table updates with command submission. If HMM doesn't provide a callback for the end of the invalidating then it can't be used for this. Adding Jerome as well, since we are certainly missing something here. Regards, Christian. > > Regards, > Philip >> Christian. >> >>> >>>>      return 0; >>>>  } >>>>   /** >>>>   * amdgpu_mn_invalidate_range_start_hsa - callback to notify >>>> about mm change >>>>   * >>>> - * @mn: our notifier >>>> - * @mm: the mm this callback is about >>>> - * @start: start of updated range >>>> - * @end: end of updated range >>>> + * @mirror: the hmm_mirror (mm) is about to update >>>> + * @update: the update start, end address >>>>   * >>>>   * We temporarily evict all BOs between start and end. This >>>>   * necessitates evicting all user-mode queues of the process. The >>>> BOs >>>>   * are restorted in amdgpu_mn_invalidate_range_end_hsa. >>>>   */ >>>> -static int amdgpu_mn_invalidate_range_start_hsa(struct >>>> mmu_notifier *mn, >>>> -                        struct mm_struct *mm, >>>> -                        unsigned long start, >>>> -                        unsigned long end, >>>> -                        bool blockable) >>>> +static int amdgpu_mn_invalidate_range_start_hsa(struct hmm_mirror >>>> *mirror, >>>> +           const struct hmm_update *update) >>>>  { >>>> -   struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); >>>> +   struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, >>>> mirror); >>>> +   unsigned long start = update->start; >>>> +   unsigned long end = update->end; >>>> +   bool blockable = update->blockable; >>>>      struct interval_tree_node *it; >>>>       /* notification is exclusive, but interval is inclusive */ >>>> @@ -326,59 +325,41 @@ static int >>>> amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn, >>>>               if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, >>>>                               start, end)) >>>> -               amdgpu_amdkfd_evict_userptr(mem, mm); >>>> +               amdgpu_amdkfd_evict_userptr(mem, amn->mm); >>>>          } >>>>      } >>>>  +   amdgpu_mn_read_unlock(amn); >>>> + >>>>      return 0; >>>>  } >>>>  -/** >>>> - * amdgpu_mn_invalidate_range_end - callback to notify about mm >>>> change >>>> - * >>>> - * @mn: our notifier >>>> - * @mm: the mm this callback is about >>>> - * @start: start of updated range >>>> - * @end: end of updated range >>>> - * >>>> - * Release the lock again to allow new command submissions. >>>> +/* Low bits of any reasonable mm pointer will be unused due to struct >>>> + * alignment. Use these bits to make a unique key from the mm pointer >>>> + * and notifier type. >>>>   */ >>>> -static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn, >>>> -                      struct mm_struct *mm, >>>> -                      unsigned long start, >>>> -                      unsigned long end) >>>> -{ >>>> -   struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); >>>> - >>>> -   amdgpu_mn_read_unlock(amn); >>>> -} >>>> +#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type)) >>>>  -static const struct mmu_notifier_ops amdgpu_mn_ops[] = { >>>> +static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = { >>>>      [AMDGPU_MN_TYPE_GFX] = { >>>> -       .release = amdgpu_mn_release, >>>> -       .invalidate_range_start = >>>> amdgpu_mn_invalidate_range_start_gfx, >>>> -       .invalidate_range_end = amdgpu_mn_invalidate_range_end, >>>> +       .sync_cpu_device_pagetables = >>>> +               amdgpu_mn_invalidate_range_start_gfx, >>>> +       .release = amdgpu_hmm_mirror_release >>>>      }, >>>>      [AMDGPU_MN_TYPE_HSA] = { >>>> -       .release = amdgpu_mn_release, >>>> -       .invalidate_range_start = >>>> amdgpu_mn_invalidate_range_start_hsa, >>>> -       .invalidate_range_end = amdgpu_mn_invalidate_range_end, >>>> +       .sync_cpu_device_pagetables = >>>> +               amdgpu_mn_invalidate_range_start_hsa, >>>> +       .release = amdgpu_hmm_mirror_release >>>>      }, >>>>  }; >>>>  -/* Low bits of any reasonable mm pointer will be unused due to >>>> struct >>>> - * alignment. Use these bits to make a unique key from the mm pointer >>>> - * and notifier type. >>>> - */ >>>> -#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type)) >>>> - >>>>  /** >>>> - * amdgpu_mn_get - create notifier context >>>> + * amdgpu_mn_get - create HMM mirror context >>>>   * >>>>   * @adev: amdgpu device pointer >>>>   * @type: type of MMU notifier context >>>>   * >>>> - * Creates a notifier context for current->mm. >>>> + * Creates a HMM mirror context for current->mm. >>>>   */ >>>>  struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, >>>>                  enum amdgpu_mn_type type) >>>> @@ -408,12 +389,12 @@ struct amdgpu_mn *amdgpu_mn_get(struct >>>> amdgpu_device *adev, >>>>      amn->mm = mm; >>>>      init_rwsem(&amn->lock); >>>>      amn->type = type; >>>> -   amn->mn.ops = &amdgpu_mn_ops[type]; >>>>      amn->objects = RB_ROOT_CACHED; >>>>      mutex_init(&amn->read_lock); >>>>      atomic_set(&amn->recursion, 0); >>>>  -   r = __mmu_notifier_register(&amn->mn, mm); >>>> +   amn->mirror.ops = &amdgpu_hmm_mirror_ops[type]; >>>> +   r = hmm_mirror_register(&amn->mirror, mm); >>>>      if (r) >>>>          goto free_amn; >>>>  @@ -439,7 +420,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct >>>> amdgpu_device *adev, >>>>   * @bo: amdgpu buffer object >>>>   * @addr: userptr addr we should monitor >>>>   * >>>> - * Registers an MMU notifier for the given BO at the specified >>>> address. >>>> + * Registers an HMM mirror for the given BO at the specified address. >>>>   * Returns 0 on success, -ERRNO if anything goes wrong. >>>>   */ >>>>  int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) >>>> @@ -495,11 +476,11 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, >>>> unsigned long addr) >>>>  } >>>>   /** >>>> - * amdgpu_mn_unregister - unregister a BO for notifier updates >>>> + * amdgpu_mn_unregister - unregister a BO for HMM mirror updates >>>>   * >>>>   * @bo: amdgpu buffer object >>>>   * >>>> - * Remove any registration of MMU notifier updates from the buffer >>>> object. >>>> + * Remove any registration of HMM mirror updates from the buffer >>>> object. >>>>   */ >>>>  void amdgpu_mn_unregister(struct amdgpu_bo *bo) >>>>  { >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h >>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h >>>> index eb0f432..0e27526 100644 >>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h >>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h >>>> @@ -34,7 +34,7 @@ enum amdgpu_mn_type { >>>>      AMDGPU_MN_TYPE_HSA, >>>>  }; >>>>  -#if defined(CONFIG_MMU_NOTIFIER) >>>> +#if defined(CONFIG_HMM) >>>>  void amdgpu_mn_lock(struct amdgpu_mn *mn); >>>>  void amdgpu_mn_unlock(struct amdgpu_mn *mn); >>>>  struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, >>> _______________________________________________ >>> amd-gfx mailing list >>> amd-gfx at lists.freedesktop.org >>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx >> >