Re: [PATCH 1/3] drm/amdgpu: use HMM mirror callback to replace mmu notifier v6

"Yang, Philip" <Philip.Yang@xxxxxxx> · Mon, 4 Feb 2019 17:17:46 +0000

On 2019-02-04 10:18 a.m., Christian König wrote:
> Am 04.02.19 um 16:06 schrieb Yang, Philip:
>> Replace our MMU notifier with hmm_mirror_ops.sync_cpu_device_pagetables
>> callback. Enable CONFIG_HMM and CONFIG_HMM_MIRROR as a dependency in
>> DRM_AMDGPU_USERPTR Kconfig.
>>
>> It supports both KFD userptr and gfx userptr paths.
>>
>> The depdent HMM patchset from Jérôme Glisse are all merged into 4.20.0
>> kernel now.
>>
>> Change-Id: Ie62c3c5e3c5b8521ab3b438d1eff2aa2a003835e
>> Signed-off-by: Philip Yang <Philip.Yang@xxxxxxx>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/Kconfig     |   6 +-
>>   drivers/gpu/drm/amd/amdgpu/Makefile    |   2 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 139 +++++++++++--------------
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h |   2 +-
>>   4 files changed, 67 insertions(+), 82 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig 
>> b/drivers/gpu/drm/amd/amdgpu/Kconfig
>> index 9221e5489069..960a63355705 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/Kconfig
>> +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
>> @@ -26,10 +26,10 @@ config DRM_AMDGPU_CIK
>>   config DRM_AMDGPU_USERPTR
>>       bool "Always enable userptr write support"
>>       depends on DRM_AMDGPU
>> -    select MMU_NOTIFIER
>> +    select HMM_MIRROR
>>       help
>> -      This option selects CONFIG_MMU_NOTIFIER if it isn't already
>> -      selected to enabled full userptr support.
>> +      This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it
>> +      isn't already selected to enabled full userptr support.
>>   config DRM_AMDGPU_GART_DEBUGFS
>>       bool "Allow GART access through debugfs"
>> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
>> b/drivers/gpu/drm/amd/amdgpu/Makefile
>> index 466da5954a68..851001ced5e8 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
>> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
>> @@ -172,7 +172,7 @@ endif
>>   amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o
>>   amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o
>>   amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o
>> -amdgpu-$(CONFIG_MMU_NOTIFIER) += amdgpu_mn.o
>> +amdgpu-$(CONFIG_HMM_MIRROR) += amdgpu_mn.o
>>   include $(FULL_AMD_PATH)/powerplay/Makefile
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>> index 3e6823fdd939..5d518d2bb9be 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>> @@ -45,7 +45,7 @@
>>   #include <linux/firmware.h>
>>   #include <linux/module.h>
>> -#include <linux/mmu_notifier.h>
>> +#include <linux/hmm.h>
>>   #include <linux/interval_tree.h>
>>   #include <drm/drmP.h>
>>   #include <drm/drm.h>
>> @@ -58,7 +58,6 @@
>>    *
>>    * @adev: amdgpu device pointer
>>    * @mm: process address space
>> - * @mn: MMU notifier structure
>>    * @type: type of MMU notifier
>>    * @work: destruction work item
>>    * @node: hash table node to find structure by adev and mn
>> @@ -66,6 +65,7 @@
>>    * @objects: interval tree containing amdgpu_mn_nodes
>>    * @read_lock: mutex for recursive locking of @lock
>>    * @recursion: depth of recursion
>> + * @mirror: HMM mirror function support
>>    *
>>    * Data for each amdgpu device and process address space.
>>    */
>> @@ -73,7 +73,6 @@ struct amdgpu_mn {
>>       /* constant after initialisation */
>>       struct amdgpu_device    *adev;
>>       struct mm_struct    *mm;
>> -    struct mmu_notifier    mn;
>>       enum amdgpu_mn_type    type;
>>       /* only used on destruction */
>> @@ -87,6 +86,9 @@ struct amdgpu_mn {
>>       struct rb_root_cached    objects;
>>       struct mutex        read_lock;
> 
>>       atomic_t        recursion;
> 
> With HMM we don't need this any more. Please remove it and simplify 
> amdgpu_mn_read_lock() and amdgpu_mn_read_unlock().
> 
Thanks, this makes sense because HMM uses hmm->mirror_sem to serialize 
invalidate range.

amn->read_lock is also not needed anymore, this was used to protect 
atomic operations for atomic_inc_return(amn->recursion) and 
down_read(amn->lock).

Just one amn->lock is needed to sync amdgpu_cs_submit and userptr 
update. I will submit another patch.

Philip

> Apart from that looks good to me,
> Christian.
> 
>> +
>> +    /* HMM mirror */
>> +    struct hmm_mirror    mirror;
>>   };
>>   /**
>> @@ -103,7 +105,7 @@ struct amdgpu_mn_node {
>>   };
>>   /**
>> - * amdgpu_mn_destroy - destroy the MMU notifier
>> + * amdgpu_mn_destroy - destroy the HMM mirror
>>    *
>>    * @work: previously sheduled work item
>>    *
>> @@ -129,28 +131,26 @@ static void amdgpu_mn_destroy(struct work_struct 
>> *work)
>>       }
>>       up_write(&amn->lock);
>>       mutex_unlock(&adev->mn_lock);
>> -    mmu_notifier_unregister_no_release(&amn->mn, amn->mm);
>> +
>> +    hmm_mirror_unregister(&amn->mirror);
>>       kfree(amn);
>>   }
>>   /**
>> - * amdgpu_mn_release - callback to notify about mm destruction
>> + * amdgpu_hmm_mirror_release - callback to notify about mm destruction
>>    *
>> - * @mn: our notifier
>> - * @mm: the mm this callback is about
>> + * @mirror: the HMM mirror (mm) this callback is about
>>    *
>> - * Shedule a work item to lazy destroy our notifier.
>> + * Shedule a work item to lazy destroy HMM mirror.
>>    */
>> -static void amdgpu_mn_release(struct mmu_notifier *mn,
>> -                  struct mm_struct *mm)
>> +static void amdgpu_hmm_mirror_release(struct hmm_mirror *mirror)
>>   {
>> -    struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
>> +    struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, 
>> mirror);
>>       INIT_WORK(&amn->work, amdgpu_mn_destroy);
>>       schedule_work(&amn->work);
>>   }
>> -
>>   /**
>>    * amdgpu_mn_lock - take the write side lock for this notifier
>>    *
>> @@ -237,141 +237,126 @@ static void amdgpu_mn_invalidate_node(struct 
>> amdgpu_mn_node *node,
>>   /**
>>    * amdgpu_mn_invalidate_range_start_gfx - callback to notify about 
>> mm change
>>    *
>> - * @mn: our notifier
>> - * @range: mmu notifier context
>> + * @mirror: the hmm_mirror (mm) is about to update
>> + * @update: the update start, end address
>>    *
>>    * Block for operations on BOs to finish and mark pages as accessed and
>>    * potentially dirty.
>>    */
>> -static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
>> -            const struct mmu_notifier_range *range)
>> +static int amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror,
>> +            const struct hmm_update *update)
>>   {
>> -    struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
>> +    struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, 
>> mirror);
>> +    unsigned long start = update->start;
>> +    unsigned long end = update->end;
>> +    bool blockable = update->blockable;
>>       struct interval_tree_node *it;
>> -    unsigned long end;
>>       /* notification is exclusive, but interval is inclusive */
>> -    end = range->end - 1;
>> +    end -= 1;
>>       /* TODO we should be able to split locking for interval tree and
>>        * amdgpu_mn_invalidate_node
>>        */
>> -    if (amdgpu_mn_read_lock(amn, range->blockable))
>> +    if (amdgpu_mn_read_lock(amn, blockable))
>>           return -EAGAIN;
>> -    it = interval_tree_iter_first(&amn->objects, range->start, end);
>> +    it = interval_tree_iter_first(&amn->objects, start, end);
>>       while (it) {
>>           struct amdgpu_mn_node *node;
>> -        if (!range->blockable) {
>> +        if (!blockable) {
>>               amdgpu_mn_read_unlock(amn);
>>               return -EAGAIN;
>>           }
>>           node = container_of(it, struct amdgpu_mn_node, it);
>> -        it = interval_tree_iter_next(it, range->start, end);
>> +        it = interval_tree_iter_next(it, start, end);
>> -        amdgpu_mn_invalidate_node(node, range->start, end);
>> +        amdgpu_mn_invalidate_node(node, start, end);
>>       }
>> +    amdgpu_mn_read_unlock(amn);
>> +
>>       return 0;
>>   }
>>   /**
>>    * amdgpu_mn_invalidate_range_start_hsa - callback to notify about 
>> mm change
>>    *
>> - * @mn: our notifier
>> - * @mm: the mm this callback is about
>> - * @start: start of updated range
>> - * @end: end of updated range
>> + * @mirror: the hmm_mirror (mm) is about to update
>> + * @update: the update start, end address
>>    *
>>    * We temporarily evict all BOs between start and end. This
>>    * necessitates evicting all user-mode queues of the process. The BOs
>>    * are restorted in amdgpu_mn_invalidate_range_end_hsa.
>>    */
>> -static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
>> -            const struct mmu_notifier_range *range)
>> +static int amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror,
>> +            const struct hmm_update *update)
>>   {
>> -    struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
>> +    struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, 
>> mirror);
>> +    unsigned long start = update->start;
>> +    unsigned long end = update->end;
>> +    bool blockable = update->blockable;
>>       struct interval_tree_node *it;
>> -    unsigned long end;
>>       /* notification is exclusive, but interval is inclusive */
>> -    end = range->end - 1;
>> +    end -= 1;
>> -    if (amdgpu_mn_read_lock(amn, range->blockable))
>> +    if (amdgpu_mn_read_lock(amn, blockable))
>>           return -EAGAIN;
>> -    it = interval_tree_iter_first(&amn->objects, range->start, end);
>> +    it = interval_tree_iter_first(&amn->objects, start, end);
>>       while (it) {
>>           struct amdgpu_mn_node *node;
>>           struct amdgpu_bo *bo;
>> -        if (!range->blockable) {
>> +        if (!blockable) {
>>               amdgpu_mn_read_unlock(amn);
>>               return -EAGAIN;
>>           }
>>           node = container_of(it, struct amdgpu_mn_node, it);
>> -        it = interval_tree_iter_next(it, range->start, end);
>> +        it = interval_tree_iter_next(it, start, end);
>>           list_for_each_entry(bo, &node->bos, mn_list) {
>>               struct kgd_mem *mem = bo->kfd_bo;
>>               if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
>> -                             range->start,
>> -                             end))
>> -                amdgpu_amdkfd_evict_userptr(mem, range->mm);
>> +                             start, end))
>> +                amdgpu_amdkfd_evict_userptr(mem, amn->mm);
>>           }
>>       }
>> +    amdgpu_mn_read_unlock(amn);
>> +
>>       return 0;
>>   }
>> -/**
>> - * amdgpu_mn_invalidate_range_end - callback to notify about mm change
>> - *
>> - * @mn: our notifier
>> - * @mm: the mm this callback is about
>> - * @start: start of updated range
>> - * @end: end of updated range
>> - *
>> - * Release the lock again to allow new command submissions.
>> +/* Low bits of any reasonable mm pointer will be unused due to struct
>> + * alignment. Use these bits to make a unique key from the mm pointer
>> + * and notifier type.
>>    */
>> -static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn,
>> -            const struct mmu_notifier_range *range)
>> -{
>> -    struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
>> -
>> -    amdgpu_mn_read_unlock(amn);
>> -}
>> +#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
>> -static const struct mmu_notifier_ops amdgpu_mn_ops[] = {
>> +static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = {
>>       [AMDGPU_MN_TYPE_GFX] = {
>> -        .release = amdgpu_mn_release,
>> -        .invalidate_range_start = amdgpu_mn_invalidate_range_start_gfx,
>> -        .invalidate_range_end = amdgpu_mn_invalidate_range_end,
>> +        .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_gfx,
>> +        .release = amdgpu_hmm_mirror_release
>>       },
>>       [AMDGPU_MN_TYPE_HSA] = {
>> -        .release = amdgpu_mn_release,
>> -        .invalidate_range_start = amdgpu_mn_invalidate_range_start_hsa,
>> -        .invalidate_range_end = amdgpu_mn_invalidate_range_end,
>> +        .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_hsa,
>> +        .release = amdgpu_hmm_mirror_release
>>       },
>>   };
>> -/* Low bits of any reasonable mm pointer will be unused due to struct
>> - * alignment. Use these bits to make a unique key from the mm pointer
>> - * and notifier type.
>> - */
>> -#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
>> -
>>   /**
>> - * amdgpu_mn_get - create notifier context
>> + * amdgpu_mn_get - create HMM mirror context
>>    *
>>    * @adev: amdgpu device pointer
>>    * @type: type of MMU notifier context
>>    *
>> - * Creates a notifier context for current->mm.
>> + * Creates a HMM mirror context for current->mm.
>>    */
>>   struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
>>                   enum amdgpu_mn_type type)
>> @@ -401,12 +386,12 @@ struct amdgpu_mn *amdgpu_mn_get(struct 
>> amdgpu_device *adev,
>>       amn->mm = mm;
>>       init_rwsem(&amn->lock);
>>       amn->type = type;
>> -    amn->mn.ops = &amdgpu_mn_ops[type];
>>       amn->objects = RB_ROOT_CACHED;
>>       mutex_init(&amn->read_lock);
>>       atomic_set(&amn->recursion, 0);
>> -    r = __mmu_notifier_register(&amn->mn, mm);
>> +    amn->mirror.ops = &amdgpu_hmm_mirror_ops[type];
>> +    r = hmm_mirror_register(&amn->mirror, mm);
>>       if (r)
>>           goto free_amn;
>> @@ -432,7 +417,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct 
>> amdgpu_device *adev,
>>    * @bo: amdgpu buffer object
>>    * @addr: userptr addr we should monitor
>>    *
>> - * Registers an MMU notifier for the given BO at the specified address.
>> + * Registers an HMM mirror for the given BO at the specified address.
>>    * Returns 0 on success, -ERRNO if anything goes wrong.
>>    */
>>   int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
>> @@ -488,11 +473,11 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, 
>> unsigned long addr)
>>   }
>>   /**
>> - * amdgpu_mn_unregister - unregister a BO for notifier updates
>> + * amdgpu_mn_unregister - unregister a BO for HMM mirror updates
>>    *
>>    * @bo: amdgpu buffer object
>>    *
>> - * Remove any registration of MMU notifier updates from the buffer 
>> object.
>> + * Remove any registration of HMM mirror updates from the buffer object.
>>    */
>>   void amdgpu_mn_unregister(struct amdgpu_bo *bo)
>>   {
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>> index eb0f432f78fe..0a51fd00021c 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>> @@ -34,7 +34,7 @@ enum amdgpu_mn_type {
>>       AMDGPU_MN_TYPE_HSA,
>>   };
>> -#if defined(CONFIG_MMU_NOTIFIER)
>> +#if defined(CONFIG_HMM_MIRROR)
>>   void amdgpu_mn_lock(struct amdgpu_mn *mn);
>>   void amdgpu_mn_unlock(struct amdgpu_mn *mn);
>>   struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
> 
_______________________________________________
amd-gfx mailing list
amd-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/amd-gfx