On 2018-09-13 02:24 PM, Christian König wrote: > Am 13.09.2018 um 20:00 schrieb Philip Yang: >> Replace our MMU notifier with hmm_mirror_ops.sync_cpu_device_pagetables >> callback. Enable CONFIG_HMM and CONFIG_HMM_MIRROR as a dependency in >> DRM_AMDGPU_USERPTR Kconfig. >> >> It supports both KFD userptr and gfx userptr paths. >> >> This depends on several HMM patchset from Jérôme Glisse queued for >> upstream. See >> http://172.27.226.38/root/kernel_amd/commits/hmm-dev-v01 (for AMD >> intranet) >> >> Change-Id: Ie62c3c5e3c5b8521ab3b438d1eff2aa2a003835e >> Signed-off-by: Philip Yang <Philip.Yang at amd.com> >> --- >>  drivers/gpu/drm/amd/amdgpu/Kconfig    | 6 +-- >>  drivers/gpu/drm/amd/amdgpu/Makefile   | 2 +- >>  drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 88 >> +++++++++++++++++++++++++++------- >>  3 files changed, 75 insertions(+), 21 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig >> b/drivers/gpu/drm/amd/amdgpu/Kconfig >> index 9221e54..960a633 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/Kconfig >> +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig >> @@ -26,10 +26,10 @@ config DRM_AMDGPU_CIK >>  config DRM_AMDGPU_USERPTR >>      bool "Always enable userptr write support" >>      depends on DRM_AMDGPU >> -   select MMU_NOTIFIER >> +   select HMM_MIRROR >>      help >> -     This option selects CONFIG_MMU_NOTIFIER if it isn't already >> -     selected to enabled full userptr support. >> +     This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it >> +     isn't already selected to enabled full userptr support. >>   config DRM_AMDGPU_GART_DEBUGFS >>      bool "Allow GART access through debugfs" >> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile >> b/drivers/gpu/drm/amd/amdgpu/Makefile >> index 138cb78..c1e5d43 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/Makefile >> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile >> @@ -171,7 +171,7 @@ endif >>  amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o >>  amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o >>  amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o >> -amdgpu-$(CONFIG_MMU_NOTIFIER) += amdgpu_mn.o >> +amdgpu-$(CONFIG_HMM) += amdgpu_mn.o >>   include $(FULL_AMD_PATH)/powerplay/Makefile >>  diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c >> index e55508b..ea8671f6 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c >> @@ -46,6 +46,7 @@ >>  #include <linux/firmware.h> >>  #include <linux/module.h> >>  #include <linux/mmu_notifier.h> >> +#include <linux/hmm.h> > > Can we now drop including linux/mmu_notifier.h? Yes, will use hmm_mirror_ops to replace gfx and kfd mmu_notifier_ops >>  #include <linux/interval_tree.h> >>  #include <drm/drmP.h> >>  #include <drm/drm.h> >> @@ -66,6 +67,7 @@ >>   * @objects: interval tree containing amdgpu_mn_nodes >>   * @read_lock: mutex for recursive locking of @lock >>   * @recursion: depth of recursion >> + * @mirror: HMM mirror function support >>   * >>   * Data for each amdgpu device and process address space. >>   */ >> @@ -87,6 +89,9 @@ struct amdgpu_mn { >>      struct rb_root_cached   objects; >>      struct mutex       read_lock; >>      atomic_t       recursion; >> + >> +   /* HMM mirror */ >> +   struct hmm_mirror   mirror; >>  }; >>   /** >> @@ -103,7 +108,7 @@ struct amdgpu_mn_node { >>  }; >>   /** >> - * amdgpu_mn_destroy - destroy the MMU notifier >> + * amdgpu_mn_destroy - destroy the HMM mirror >>   * >>   * @work: previously sheduled work item >>   * >> @@ -129,28 +134,27 @@ static void amdgpu_mn_destroy(struct >> work_struct *work) >>      } >>      up_write(&amn->lock); >>      mutex_unlock(&adev->mn_lock); >> -   mmu_notifier_unregister_no_release(&amn->mn, amn->mm); >> + >> +   hmm_mirror_unregister(&amn->mirror); >> + >>      kfree(amn); >>  } >>   /** >>   * amdgpu_mn_release - callback to notify about mm destruction >>   * >> - * @mn: our notifier >> - * @mm: the mm this callback is about >> + * @mirror: the HMM mirror (mm) this callback is about >>   * >> - * Shedule a work item to lazy destroy our notifier. >> + * Shedule a work item to lazy destroy HMM mirror. >>   */ >> -static void amdgpu_mn_release(struct mmu_notifier *mn, >> -                 struct mm_struct *mm) >> +static void amdgpu_hmm_mirror_release(struct hmm_mirror *mirror) >>  { >> -   struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); >> +   struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, >> mirror); >>       INIT_WORK(&amn->work, amdgpu_mn_destroy); >>      schedule_work(&amn->work); >>  } >>  - >>  /** >>   * amdgpu_mn_lock - take the write side lock for this notifier >>   * >> @@ -355,12 +359,10 @@ static void >> amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn, >>   static const struct mmu_notifier_ops amdgpu_mn_ops[] = { >>      [AMDGPU_MN_TYPE_GFX] = { >> -       .release = amdgpu_mn_release, >>          .invalidate_range_start = >> amdgpu_mn_invalidate_range_start_gfx, >>          .invalidate_range_end = amdgpu_mn_invalidate_range_end, >>      }, >>      [AMDGPU_MN_TYPE_HSA] = { >> -       .release = amdgpu_mn_release, >>          .invalidate_range_start = >> amdgpu_mn_invalidate_range_start_hsa, >>          .invalidate_range_end = amdgpu_mn_invalidate_range_end, >>      }, >> @@ -373,12 +375,63 @@ static const struct mmu_notifier_ops >> amdgpu_mn_ops[] = { >>  #define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type)) >>   /** >> - * amdgpu_mn_get - create notifier context >> + * amdgpu_hmm_sync_cpu_device_pagetables - synchronize CPU/GPU page >> tables >> + * >> + * @mirror: the hmm_mirror (mm) is about to update >> + * @update: the update start, end address >> + * >> + * This callback is called from mmu_notifiers when the CPU page >> table is >> + * updated. >> + */ >> +static int amdgpu_hmm_sync_cpu_device_pagetables(struct hmm_mirror >> *mirror, >> +           const struct hmm_update *update) >> +{ >> +   struct amdgpu_mn *amn; >> +   struct hmm *hmm; >> +   struct mm_struct *mm; >> +   unsigned long start; >> +   unsigned long end; >> +   unsigned long key; >> +   int r = 0; >> + >> +   start = update->start; >> +   end = update->end; >> +   hmm = mirror->hmm; >> +   mm = *(struct mm_struct **)hmm; >> + >> +   pr_debug("mirror %p start %lx end %lx\n", mirror, start, end); > > You should probably remove those pr_debug lines before upstreaming. > > Alternatively we could turn them into trace points. > >> + >> +   amn = container_of(mirror, struct amdgpu_mn, mirror); >> +   key = AMDGPU_MN_KEY(mm, amn->type); >> + >> +   hash_for_each_possible(amn->adev->mn_hash, amn, node, key) >> +       if (AMDGPU_MN_KEY(amn->mm, amn->type) == key) { >> +           r = amn->mn.ops->invalidate_range_start(&amn->mn, mm, >> +                           start, end, >> +                           update->blockable); >> + amn->mn.ops->invalidate_range_end(&amn->mn, mm, >> +                           start, end); >> +           if (r) { >> +               DRM_ERROR("Failed to invalidate %lx\n", start); >> +               break; >> +           } >> +       } > > That looks fishy to me, why is this necessary? > Yeah, it is unnecessary, one process (mm) register once hmm mirror amn for each adev, so one mm will get individual call back, do not need use mm to lookup amn from adev->mn_hash. >> + >> +   return r; >> +} >> + >> +static struct hmm_mirror_ops amdgpu_hmm_mirror_ops = { >> +   .sync_cpu_device_pagetables = >> amdgpu_hmm_sync_cpu_device_pagetables, >> +   .release = amdgpu_hmm_mirror_release >> +}; > > You should probably clean that up as well and have a separate one for > each AMDGPU_MN_TYPE_*. > Done, will submit v4. > Christian. > >> + >> +/** >> + * amdgpu_mn_get - create HMM mirror context >>   * >>   * @adev: amdgpu device pointer >>   * @type: type of MMU notifier context >>   * >> - * Creates a notifier context for current->mm. >> + * Creates a HMM mirror context for current->mm. >>   */ >>  struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, >>                  enum amdgpu_mn_type type) >> @@ -413,7 +466,8 @@ struct amdgpu_mn *amdgpu_mn_get(struct >> amdgpu_device *adev, >>      mutex_init(&amn->read_lock); >>      atomic_set(&amn->recursion, 0); >>  -   r = __mmu_notifier_register(&amn->mn, mm); >> +   amn->mirror.ops = &amdgpu_hmm_mirror_ops; >> +   r = hmm_mirror_register(&amn->mirror, mm); >>      if (r) >>          goto free_amn; >>  @@ -439,7 +493,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct >> amdgpu_device *adev, >>   * @bo: amdgpu buffer object >>   * @addr: userptr addr we should monitor >>   * >> - * Registers an MMU notifier for the given BO at the specified address. >> + * Registers an HMM mirror for the given BO at the specified address. >>   * Returns 0 on success, -ERRNO if anything goes wrong. >>   */ >>  int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) >> @@ -495,11 +549,11 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, >> unsigned long addr) >>  } >>   /** >> - * amdgpu_mn_unregister - unregister a BO for notifier updates >> + * amdgpu_mn_unregister - unregister a BO for HMM mirror updates >>   * >>   * @bo: amdgpu buffer object >>   * >> - * Remove any registration of MMU notifier updates from the buffer >> object. >> + * Remove any registration of HMM mirror updates from the buffer >> object. >>   */ >>  void amdgpu_mn_unregister(struct amdgpu_bo *bo) >>  { >