[PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4

felix.kuehling@xxxxxxx (Felix Kuehling) · Thu, 13 Sep 2018 17:51:59 -0400

On 2018-09-13 04:52 PM, Philip Yang wrote:
> Replace our MMU notifier with hmm_mirror_ops.sync_cpu_device_pagetables
> callback. Enable CONFIG_HMM and CONFIG_HMM_MIRROR as a dependency in
> DRM_AMDGPU_USERPTR Kconfig.
>
> It supports both KFD userptr and gfx userptr paths.
>
> This depends on several HMM patchset from JÃ©rÃ´me Glisse queued for
> upstream.
>
> Change-Id: Ie62c3c5e3c5b8521ab3b438d1eff2aa2a003835e
> Signed-off-by: Philip Yang <Philip.Yang at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/Kconfig     |   6 +-
>  drivers/gpu/drm/amd/amdgpu/Makefile    |   2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 121 ++++++++++++++-------------------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h |   2 +-
>  4 files changed, 56 insertions(+), 75 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig
> index 9221e54..960a633 100644
> --- a/drivers/gpu/drm/amd/amdgpu/Kconfig
> +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
> @@ -26,10 +26,10 @@ config DRM_AMDGPU_CIK
>  config DRM_AMDGPU_USERPTR
>  	bool "Always enable userptr write support"
>  	depends on DRM_AMDGPU
> -	select MMU_NOTIFIER
> +	select HMM_MIRROR
>  	help
> -	  This option selects CONFIG_MMU_NOTIFIER if it isn't already
> -	  selected to enabled full userptr support.
> +	  This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it
> +	  isn't already selected to enabled full userptr support.
>  
>  config DRM_AMDGPU_GART_DEBUGFS
>  	bool "Allow GART access through debugfs"
> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
> index 138cb78..c1e5d43 100644
> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
> @@ -171,7 +171,7 @@ endif
>  amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o
>  amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o
>  amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o
> -amdgpu-$(CONFIG_MMU_NOTIFIER) += amdgpu_mn.o
> +amdgpu-$(CONFIG_HMM) += amdgpu_mn.o
>  
>  include $(FULL_AMD_PATH)/powerplay/Makefile
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> index e55508b..ad52f34 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> @@ -45,7 +45,7 @@
>  
>  #include <linux/firmware.h>
>  #include <linux/module.h>
> -#include <linux/mmu_notifier.h>
> +#include <linux/hmm.h>
>  #include <linux/interval_tree.h>
>  #include <drm/drmP.h>
>  #include <drm/drm.h>
> @@ -66,6 +66,7 @@

Need to remove @mn documentation.

>   * @objects: interval tree containing amdgpu_mn_nodes
>   * @read_lock: mutex for recursive locking of @lock
>   * @recursion: depth of recursion
> + * @mirror: HMM mirror function support
>   *
>   * Data for each amdgpu device and process address space.
>   */
> @@ -73,7 +74,6 @@ struct amdgpu_mn {
>  	/* constant after initialisation */
>  	struct amdgpu_device	*adev;
>  	struct mm_struct	*mm;
> -	struct mmu_notifier	mn;
>  	enum amdgpu_mn_type	type;
>  
>  	/* only used on destruction */
> @@ -87,6 +87,9 @@ struct amdgpu_mn {
>  	struct rb_root_cached	objects;
>  	struct mutex		read_lock;
>  	atomic_t		recursion;
> +
> +	/* HMM mirror */
> +	struct hmm_mirror	mirror;
>  };
>  
>  /**
> @@ -103,7 +106,7 @@ struct amdgpu_mn_node {
>  };
>  
>  /**
> - * amdgpu_mn_destroy - destroy the MMU notifier
> + * amdgpu_mn_destroy - destroy the HMM mirror
>   *
>   * @work: previously sheduled work item
>   *
> @@ -129,28 +132,26 @@ static void amdgpu_mn_destroy(struct work_struct *work)
>  	}
>  	up_write(&amn->lock);
>  	mutex_unlock(&adev->mn_lock);
> -	mmu_notifier_unregister_no_release(&amn->mn, amn->mm);
> +	hmm_mirror_unregister(&amn->mirror);
> +
>  	kfree(amn);
>  }
>  
>  /**
>   * amdgpu_mn_release - callback to notify about mm destruction

Update the function name in the comment.

>   *
> - * @mn: our notifier
> - * @mm: the mm this callback is about
> + * @mirror: the HMM mirror (mm) this callback is about
>   *
> - * Shedule a work item to lazy destroy our notifier.
> + * Shedule a work item to lazy destroy HMM mirror.
>   */
> -static void amdgpu_mn_release(struct mmu_notifier *mn,
> -			      struct mm_struct *mm)
> +static void amdgpu_hmm_mirror_release(struct hmm_mirror *mirror)
>  {
> -	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
> +	struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
>  
>  	INIT_WORK(&amn->work, amdgpu_mn_destroy);
>  	schedule_work(&amn->work);
>  }
>  
> -
>  /**
>   * amdgpu_mn_lock - take the write side lock for this notifier
>   *
> @@ -237,21 +238,19 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
>  /**
>   * amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change
>   *
> - * @mn: our notifier
> - * @mm: the mm this callback is about
> - * @start: start of updated range
> - * @end: end of updated range
> + * @mirror: the hmm_mirror (mm) is about to update
> + * @update: the update start, end address
>   *
>   * Block for operations on BOs to finish and mark pages as accessed and
>   * potentially dirty.
>   */
> -static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
> -						 struct mm_struct *mm,
> -						 unsigned long start,
> -						 unsigned long end,
> -						 bool blockable)
> +static int amdgpu_mn_invalidate_range_start_gfx(struct hmm_mirror *mirror,
> +			const struct hmm_update *update)
>  {
> -	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
> +	struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
> +	unsigned long start = update->start;
> +	unsigned long end = update->end;
> +	bool blockable = update->blockable;
>  	struct interval_tree_node *it;
>  
>  	/* notification is exclusive, but interval is inclusive */
> @@ -278,28 +277,28 @@ static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
>  		amdgpu_mn_invalidate_node(node, start, end);
>  	}
>  
> +	amdgpu_mn_read_unlock(amn);
> +

amdgpu_mn_read_lock/unlock support recursive locking for multiple
overlapping or nested invalidation ranges. But if you'r locking and
unlocking in the same function. Is that still a concern?

>  	return 0;
>  }
>  
>  /**
>   * amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm change
>   *
> - * @mn: our notifier
> - * @mm: the mm this callback is about
> - * @start: start of updated range
> - * @end: end of updated range
> + * @mirror: the hmm_mirror (mm) is about to update
> + * @update: the update start, end address
>   *
>   * We temporarily evict all BOs between start and end. This
>   * necessitates evicting all user-mode queues of the process. The BOs
>   * are restorted in amdgpu_mn_invalidate_range_end_hsa.
>   */
> -static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
> -						 struct mm_struct *mm,
> -						 unsigned long start,
> -						 unsigned long end,
> -						 bool blockable)
> +static int amdgpu_mn_invalidate_range_start_hsa(struct hmm_mirror *mirror,
> +			const struct hmm_update *update)
>  {
> -	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
> +	struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
> +	unsigned long start = update->start;
> +	unsigned long end = update->end;
> +	bool blockable = update->blockable;
>  	struct interval_tree_node *it;
>  
>  	/* notification is exclusive, but interval is inclusive */
> @@ -326,59 +325,41 @@ static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
>  
>  			if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
>  							 start, end))
> -				amdgpu_amdkfd_evict_userptr(mem, mm);
> +				amdgpu_amdkfd_evict_userptr(mem, amn->mm);
>  		}
>  	}
>  
> +	amdgpu_mn_read_unlock(amn);
> +
>  	return 0;
>  }
>  
> -/**
> - * amdgpu_mn_invalidate_range_end - callback to notify about mm change
> - *
> - * @mn: our notifier
> - * @mm: the mm this callback is about
> - * @start: start of updated range
> - * @end: end of updated range
> - *
> - * Release the lock again to allow new command submissions.
> +/* Low bits of any reasonable mm pointer will be unused due to struct
> + * alignment. Use these bits to make a unique key from the mm pointer
> + * and notifier type.
>   */
> -static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn,
> -					   struct mm_struct *mm,
> -					   unsigned long start,
> -					   unsigned long end)
> -{
> -	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
> -
> -	amdgpu_mn_read_unlock(amn);
> -}
> +#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
>  
> -static const struct mmu_notifier_ops amdgpu_mn_ops[] = {
> +static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = {
>  	[AMDGPU_MN_TYPE_GFX] = {
> -		.release = amdgpu_mn_release,
> -		.invalidate_range_start = amdgpu_mn_invalidate_range_start_gfx,
> -		.invalidate_range_end = amdgpu_mn_invalidate_range_end,
> +		.sync_cpu_device_pagetables =
> +				amdgpu_mn_invalidate_range_start_gfx,
> +		.release = amdgpu_hmm_mirror_release
>  	},
>  	[AMDGPU_MN_TYPE_HSA] = {
> -		.release = amdgpu_mn_release,
> -		.invalidate_range_start = amdgpu_mn_invalidate_range_start_hsa,
> -		.invalidate_range_end = amdgpu_mn_invalidate_range_end,
> +		.sync_cpu_device_pagetables =
> +				amdgpu_mn_invalidate_range_start_hsa,
> +		.release = amdgpu_hmm_mirror_release
>  	},
>  };
>  
> -/* Low bits of any reasonable mm pointer will be unused due to struct
> - * alignment. Use these bits to make a unique key from the mm pointer
> - * and notifier type.
> - */
> -#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
> -
>  /**
> - * amdgpu_mn_get - create notifier context
> + * amdgpu_mn_get - create HMM mirror context
>   *
>   * @adev: amdgpu device pointer
>   * @type: type of MMU notifier context
>   *
> - * Creates a notifier context for current->mm.
> + * Creates a HMM mirror context for current->mm.
>   */
>  struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
>  				enum amdgpu_mn_type type)
> @@ -408,12 +389,12 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
>  	amn->mm = mm;
>  	init_rwsem(&amn->lock);
>  	amn->type = type;
> -	amn->mn.ops = &amdgpu_mn_ops[type];
>  	amn->objects = RB_ROOT_CACHED;
>  	mutex_init(&amn->read_lock);
>  	atomic_set(&amn->recursion, 0);
>  
> -	r = __mmu_notifier_register(&amn->mn, mm);
> +	amn->mirror.ops = &amdgpu_hmm_mirror_ops[type];
> +	r = hmm_mirror_register(&amn->mirror, mm);
>  	if (r)
>  		goto free_amn;
>  
> @@ -439,7 +420,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
>   * @bo: amdgpu buffer object
>   * @addr: userptr addr we should monitor
>   *
> - * Registers an MMU notifier for the given BO at the specified address.
> + * Registers an HMM mirror for the given BO at the specified address.
>   * Returns 0 on success, -ERRNO if anything goes wrong.
>   */
>  int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
> @@ -495,11 +476,11 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
>  }
>  
>  /**
> - * amdgpu_mn_unregister - unregister a BO for notifier updates
> + * amdgpu_mn_unregister - unregister a BO for HMM mirror updates
>   *
>   * @bo: amdgpu buffer object
>   *
> - * Remove any registration of MMU notifier updates from the buffer object.
> + * Remove any registration of HMM mirror updates from the buffer object.
>   */
>  void amdgpu_mn_unregister(struct amdgpu_bo *bo)
>  {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
> index eb0f432..0e27526 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
> @@ -34,7 +34,7 @@ enum amdgpu_mn_type {
>  	AMDGPU_MN_TYPE_HSA,
>  };
>  
> -#if defined(CONFIG_MMU_NOTIFIER)
> +#if defined(CONFIG_HMM)
>  void amdgpu_mn_lock(struct amdgpu_mn *mn);
>  void amdgpu_mn_unlock(struct amdgpu_mn *mn);
>  struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,