On 2018-09-14 01:52 PM, Christian König wrote: > Am 14.09.2018 um 19:47 schrieb Philip Yang: >> On 2018-09-14 03:51 AM, Christian König wrote: >>> Am 13.09.2018 um 23:51 schrieb Felix Kuehling: >>>> On 2018-09-13 04:52 PM, Philip Yang wrote: >>>>> Replace our MMU notifier with >>>>> hmm_mirror_ops.sync_cpu_device_pagetables >>>>> callback. Enable CONFIG_HMM and CONFIG_HMM_MIRROR as a dependency in >>>>> DRM_AMDGPU_USERPTR Kconfig. >>>>> >>>>> It supports both KFD userptr and gfx userptr paths. >>>>> >>>>> This depends on several HMM patchset from Jérôme Glisse queued for >>>>> upstream. >>>>> >>>>> Change-Id: Ie62c3c5e3c5b8521ab3b438d1eff2aa2a003835e >>>>> Signed-off-by: Philip Yang <Philip.Yang at amd.com> >>>>> --- >>>>>  drivers/gpu/drm/amd/amdgpu/Kconfig    |  6 +- >>>>>  drivers/gpu/drm/amd/amdgpu/Makefile   |  2 +- >>>>>  drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 121 >>>>> ++++++++++++++------------------- >>>>>  drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h |  2 +- >>>>>  4 files changed, 56 insertions(+), 75 deletions(-) >>>>> >>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig >>>>> b/drivers/gpu/drm/amd/amdgpu/Kconfig >>>>> index 9221e54..960a633 100644 >>>>> --- a/drivers/gpu/drm/amd/amdgpu/Kconfig >>>>> +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig >>>>> @@ -26,10 +26,10 @@ config DRM_AMDGPU_CIK >>>>>  config DRM_AMDGPU_USERPTR >>>>>      bool "Always enable userptr write support" >>>>>      depends on DRM_AMDGPU >>>>> -   select MMU_NOTIFIER >>>>> +   select HMM_MIRROR >>>>>      help >>>>> -     This option selects CONFIG_MMU_NOTIFIER if it isn't already >>>>> -     selected to enabled full userptr support. >>>>> +     This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it >>>>> +     isn't already selected to enabled full userptr support. >>>>>   config DRM_AMDGPU_GART_DEBUGFS >>>>>      bool "Allow GART access through debugfs" >>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile >>>>> b/drivers/gpu/drm/amd/amdgpu/Makefile >>>>> index 138cb78..c1e5d43 100644 >>>>> --- a/drivers/gpu/drm/amd/amdgpu/Makefile >>>>> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile >>>>> @@ -171,7 +171,7 @@ endif >>>>>  amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o >>>>>  amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o >>>>>  amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o >>>>> -amdgpu-$(CONFIG_MMU_NOTIFIER) += amdgpu_mn.o >>>>> +amdgpu-$(CONFIG_HMM) += amdgpu_mn.o >>>>>   include $(FULL_AMD_PATH)/powerplay/Makefile >>>>>  diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c >>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c >>>>> index e55508b..ad52f34 100644 >>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c >>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c >>>>> @@ -45,7 +45,7 @@ >>>>>   #include <linux/firmware.h> >>>>>  #include <linux/module.h> >>>>> -#include <linux/mmu_notifier.h> >>>>> +#include <linux/hmm.h> >>>>>  #include <linux/interval_tree.h> >>>>>  #include <drm/drmP.h> >>>>>  #include <drm/drm.h> >>>>> @@ -66,6 +66,7 @@ >>>> Need to remove @mn documentation. >>>> >>>>>   * @objects: interval tree containing amdgpu_mn_nodes >>>>>   * @read_lock: mutex for recursive locking of @lock >>>>>   * @recursion: depth of recursion >>>>> + * @mirror: HMM mirror function support >>>>>   * >>>>>   * Data for each amdgpu device and process address space. >>>>>   */ >>>>> @@ -73,7 +74,6 @@ struct amdgpu_mn { >>>>>      /* constant after initialisation */ >>>>>      struct amdgpu_device   *adev; >>>>>      struct mm_struct   *mm; >>>>> -   struct mmu_notifier   mn; >>>>>      enum amdgpu_mn_type   type; >>>>>       /* only used on destruction */ >>>>> @@ -87,6 +87,9 @@ struct amdgpu_mn { >>>>>      struct rb_root_cached   objects; >>>>>      struct mutex       read_lock; >>>>>      atomic_t       recursion; >>>>> + >>>>> +   /* HMM mirror */ >>>>> +   struct hmm_mirror   mirror; >>>>>  }; >>>>>   /** >>>>> @@ -103,7 +106,7 @@ struct amdgpu_mn_node { >>>>>  }; >>>>>   /** >>>>> - * amdgpu_mn_destroy - destroy the MMU notifier >>>>> + * amdgpu_mn_destroy - destroy the HMM mirror >>>>>   * >>>>>   * @work: previously sheduled work item >>>>>   * >>>>> @@ -129,28 +132,26 @@ static void amdgpu_mn_destroy(struct >>>>> work_struct *work) >>>>>      } >>>>>      up_write(&amn->lock); >>>>>      mutex_unlock(&adev->mn_lock); >>>>> -   mmu_notifier_unregister_no_release(&amn->mn, amn->mm); >>>>> +   hmm_mirror_unregister(&amn->mirror); >>>>> + >>>>>      kfree(amn); >>>>>  } >>>>>   /** >>>>>   * amdgpu_mn_release - callback to notify about mm destruction >>>> Update the function name in the comment. >>>> >>>>>   * >>>>> - * @mn: our notifier >>>>> - * @mm: the mm this callback is about >>>>> + * @mirror: the HMM mirror (mm) this callback is about >>>>>   * >>>>> - * Shedule a work item to lazy destroy our notifier. >>>>> + * Shedule a work item to lazy destroy HMM mirror. >>>>>   */ >>>>> -static void amdgpu_mn_release(struct mmu_notifier *mn, >>>>> -                 struct mm_struct *mm) >>>>> +static void amdgpu_hmm_mirror_release(struct hmm_mirror *mirror) >>>>>  { >>>>> -   struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); >>>>> +   struct amdgpu_mn *amn = container_of(mirror, struct >>>>> amdgpu_mn, mirror); >>>>>       INIT_WORK(&amn->work, amdgpu_mn_destroy); >>>>>      schedule_work(&amn->work); >>>>>  } >>>>>  - >>>>>  /** >>>>>   * amdgpu_mn_lock - take the write side lock for this notifier >>>>>   * >>>>> @@ -237,21 +238,19 @@ static void amdgpu_mn_invalidate_node(struct >>>>> amdgpu_mn_node *node, >>>>>  /** >>>>>   * amdgpu_mn_invalidate_range_start_gfx - callback to notify >>>>> about mm change >>>>>   * >>>>> - * @mn: our notifier >>>>> - * @mm: the mm this callback is about >>>>> - * @start: start of updated range >>>>> - * @end: end of updated range >>>>> + * @mirror: the hmm_mirror (mm) is about to update >>>>> + * @update: the update start, end address >>>>>   * >>>>>   * Block for operations on BOs to finish and mark pages as >>>>> accessed and >>>>>   * potentially dirty. >>>>>   */ >>>>> -static int amdgpu_mn_invalidate_range_start_gfx(struct >>>>> mmu_notifier *mn, >>>>> -                        struct mm_struct *mm, >>>>> -                        unsigned long start, >>>>> -                        unsigned long end, >>>>> -                        bool blockable) >>>>> +static int amdgpu_mn_invalidate_range_start_gfx(struct hmm_mirror >>>>> *mirror, >>>>> +           const struct hmm_update *update) >>>>>  { >>>>> -   struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); >>>>> +   struct amdgpu_mn *amn = container_of(mirror, struct >>>>> amdgpu_mn, mirror); >>>>> +   unsigned long start = update->start; >>>>> +   unsigned long end = update->end; >>>>> +   bool blockable = update->blockable; >>>>>      struct interval_tree_node *it; >>>>>       /* notification is exclusive, but interval is inclusive */ >>>>> @@ -278,28 +277,28 @@ static int >>>>> amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn, >>>>>          amdgpu_mn_invalidate_node(node, start, end); >>>>>      } >>>>>  +   amdgpu_mn_read_unlock(amn); >>>>> + >>>> amdgpu_mn_read_lock/unlock support recursive locking for multiple >>>> overlapping or nested invalidation ranges. But if you'r locking and >>>> unlocking in the same function. Is that still a concern? >>> >> I don't understand the possible recursive case, but >> amdgpu_mn_read_lock() still support recursive locking. >>> Well the real problem is that unlocking them here won't work. >>> >>> We need to hold the lock until we are sure that the operation which >>> updates the page tables is completed. >>> >> The reason for this change is because hmm mirror has invalidate_start >> callback, no invalidate_end callback >> >> Check mmu_notifier.c and hmm.c again, below is entire logic to update >> CPU page tables and callback: >> >> mn lock amn->lock is used to protect interval tree access because >> user may submit/register new userptr anytime. >> This is same for old and new way. >> >> step 2 guarantee the GPU operation is done before updating CPU page >> table. >> >> So I think the change is safe. We don't need hold mn lock until the >> CPU page tables update is completed. > > No, that isn't even remotely correct. The lock doesn't protects the > interval tree. > >> >> Old: >>   1. down_read_non_owner(&amn->lock) >>   2. loop to handle BOs from node->bos through interval tree >> amn->object nodes >>       gfx: wait for pending BOs fence operation done, mark user >> pages dirty >>       kfd: evict user queues of the process, wait for queue >> unmap/map operation done >>   3. update CPU page tables >>   4. up_read(&amn->lock) >> >> New, switch step 3 and 4 >>   1. down_read_non_owner(&amn->lock) >>   2. loop to handle BOs from node->bos through interval tree >> amn->object nodes >>       gfx: wait for pending BOs fence operation done, mark user >> pages dirty >>       kfd: evict user queues of the process, wait for queue >> unmap/map operation done >>   3. up_read(&amn->lock) >>   4. update CPU page tables > > The lock is there to make sure that we serialize page table updates > with command submission. As I understand it, the idea is to prevent command submission (adding new fences to BOs) while a page table invalidation is in progress. But do we really need another lock for this? Wouldn't the re-validation of userptr BOs (currently calling get_user_pages) force synchronization with the ongoing page table invalidation through the mmap_sem or other MM locks? Regards,  Felix > > If HMM doesn't provide a callback for the end of the invalidating then > it can't be used for this. > > Adding Jerome as well, since we are certainly missing something here. > > Regards, > Christian. > >> >> Regards, >> Philip >>> Christian. >>> >>>> >>>>>      return 0; >>>>>  } >>>>>   /** >>>>>   * amdgpu_mn_invalidate_range_start_hsa - callback to notify >>>>> about mm change >>>>>   * >>>>> - * @mn: our notifier >>>>> - * @mm: the mm this callback is about >>>>> - * @start: start of updated range >>>>> - * @end: end of updated range >>>>> + * @mirror: the hmm_mirror (mm) is about to update >>>>> + * @update: the update start, end address >>>>>   * >>>>>   * We temporarily evict all BOs between start and end. This >>>>>   * necessitates evicting all user-mode queues of the process. >>>>> The BOs >>>>>   * are restorted in amdgpu_mn_invalidate_range_end_hsa. >>>>>   */ >>>>> -static int amdgpu_mn_invalidate_range_start_hsa(struct >>>>> mmu_notifier *mn, >>>>> -                        struct mm_struct *mm, >>>>> -                        unsigned long start, >>>>> -                        unsigned long end, >>>>> -                        bool blockable) >>>>> +static int amdgpu_mn_invalidate_range_start_hsa(struct hmm_mirror >>>>> *mirror, >>>>> +           const struct hmm_update *update) >>>>>  { >>>>> -   struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); >>>>> +   struct amdgpu_mn *amn = container_of(mirror, struct >>>>> amdgpu_mn, mirror); >>>>> +   unsigned long start = update->start; >>>>> +   unsigned long end = update->end; >>>>> +   bool blockable = update->blockable; >>>>>      struct interval_tree_node *it; >>>>>       /* notification is exclusive, but interval is inclusive */ >>>>> @@ -326,59 +325,41 @@ static int >>>>> amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn, >>>>>               if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, >>>>>                               start, end)) >>>>> -               amdgpu_amdkfd_evict_userptr(mem, mm); >>>>> +               amdgpu_amdkfd_evict_userptr(mem, amn->mm); >>>>>          } >>>>>      } >>>>>  +   amdgpu_mn_read_unlock(amn); >>>>> + >>>>>      return 0; >>>>>  } >>>>>  -/** >>>>> - * amdgpu_mn_invalidate_range_end - callback to notify about mm >>>>> change >>>>> - * >>>>> - * @mn: our notifier >>>>> - * @mm: the mm this callback is about >>>>> - * @start: start of updated range >>>>> - * @end: end of updated range >>>>> - * >>>>> - * Release the lock again to allow new command submissions. >>>>> +/* Low bits of any reasonable mm pointer will be unused due to >>>>> struct >>>>> + * alignment. Use these bits to make a unique key from the mm >>>>> pointer >>>>> + * and notifier type. >>>>>   */ >>>>> -static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn, >>>>> -                      struct mm_struct *mm, >>>>> -                      unsigned long start, >>>>> -                      unsigned long end) >>>>> -{ >>>>> -   struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); >>>>> - >>>>> -   amdgpu_mn_read_unlock(amn); >>>>> -} >>>>> +#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type)) >>>>>  -static const struct mmu_notifier_ops amdgpu_mn_ops[] = { >>>>> +static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = { >>>>>      [AMDGPU_MN_TYPE_GFX] = { >>>>> -       .release = amdgpu_mn_release, >>>>> -       .invalidate_range_start = >>>>> amdgpu_mn_invalidate_range_start_gfx, >>>>> -       .invalidate_range_end = amdgpu_mn_invalidate_range_end, >>>>> +       .sync_cpu_device_pagetables = >>>>> +               amdgpu_mn_invalidate_range_start_gfx, >>>>> +       .release = amdgpu_hmm_mirror_release >>>>>      }, >>>>>      [AMDGPU_MN_TYPE_HSA] = { >>>>> -       .release = amdgpu_mn_release, >>>>> -       .invalidate_range_start = >>>>> amdgpu_mn_invalidate_range_start_hsa, >>>>> -       .invalidate_range_end = amdgpu_mn_invalidate_range_end, >>>>> +       .sync_cpu_device_pagetables = >>>>> +               amdgpu_mn_invalidate_range_start_hsa, >>>>> +       .release = amdgpu_hmm_mirror_release >>>>>      }, >>>>>  }; >>>>>  -/* Low bits of any reasonable mm pointer will be unused due to >>>>> struct >>>>> - * alignment. Use these bits to make a unique key from the mm >>>>> pointer >>>>> - * and notifier type. >>>>> - */ >>>>> -#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type)) >>>>> - >>>>>  /** >>>>> - * amdgpu_mn_get - create notifier context >>>>> + * amdgpu_mn_get - create HMM mirror context >>>>>   * >>>>>   * @adev: amdgpu device pointer >>>>>   * @type: type of MMU notifier context >>>>>   * >>>>> - * Creates a notifier context for current->mm. >>>>> + * Creates a HMM mirror context for current->mm. >>>>>   */ >>>>>  struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, >>>>>                  enum amdgpu_mn_type type) >>>>> @@ -408,12 +389,12 @@ struct amdgpu_mn *amdgpu_mn_get(struct >>>>> amdgpu_device *adev, >>>>>      amn->mm = mm; >>>>>      init_rwsem(&amn->lock); >>>>>      amn->type = type; >>>>> -   amn->mn.ops = &amdgpu_mn_ops[type]; >>>>>      amn->objects = RB_ROOT_CACHED; >>>>>      mutex_init(&amn->read_lock); >>>>>      atomic_set(&amn->recursion, 0); >>>>>  -   r = __mmu_notifier_register(&amn->mn, mm); >>>>> +   amn->mirror.ops = &amdgpu_hmm_mirror_ops[type]; >>>>> +   r = hmm_mirror_register(&amn->mirror, mm); >>>>>      if (r) >>>>>          goto free_amn; >>>>>  @@ -439,7 +420,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct >>>>> amdgpu_device *adev, >>>>>   * @bo: amdgpu buffer object >>>>>   * @addr: userptr addr we should monitor >>>>>   * >>>>> - * Registers an MMU notifier for the given BO at the specified >>>>> address. >>>>> + * Registers an HMM mirror for the given BO at the specified >>>>> address. >>>>>   * Returns 0 on success, -ERRNO if anything goes wrong. >>>>>   */ >>>>>  int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) >>>>> @@ -495,11 +476,11 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, >>>>> unsigned long addr) >>>>>  } >>>>>   /** >>>>> - * amdgpu_mn_unregister - unregister a BO for notifier updates >>>>> + * amdgpu_mn_unregister - unregister a BO for HMM mirror updates >>>>>   * >>>>>   * @bo: amdgpu buffer object >>>>>   * >>>>> - * Remove any registration of MMU notifier updates from the >>>>> buffer object. >>>>> + * Remove any registration of HMM mirror updates from the buffer >>>>> object. >>>>>   */ >>>>>  void amdgpu_mn_unregister(struct amdgpu_bo *bo) >>>>>  { >>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h >>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h >>>>> index eb0f432..0e27526 100644 >>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h >>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h >>>>> @@ -34,7 +34,7 @@ enum amdgpu_mn_type { >>>>>      AMDGPU_MN_TYPE_HSA, >>>>>  }; >>>>>  -#if defined(CONFIG_MMU_NOTIFIER) >>>>> +#if defined(CONFIG_HMM) >>>>>  void amdgpu_mn_lock(struct amdgpu_mn *mn); >>>>>  void amdgpu_mn_unlock(struct amdgpu_mn *mn); >>>>>  struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, >>>> _______________________________________________ >>>> amd-gfx mailing list >>>> amd-gfx at lists.freedesktop.org >>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx >>> >> >