This fence is used by KFD to keep memory resident while user mode queues are enabled. Trying to evict memory will trigger the enable_signaling callback, which starts a KFD eviction, which involves preempting user mode queues before signaling the fence. There is one such fence per process. Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com> --- drivers/gpu/drm/amd/amdgpu/Makefile | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 15 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c | 196 +++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 5 +- drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 18 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 18 +++ drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 6 + 7 files changed, 256 insertions(+), 3 deletions(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index d6e5b72..43dc3f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -130,6 +130,7 @@ amdgpu-y += \ # add amdkfd interfaces amdgpu-y += \ amdgpu_amdkfd.o \ + amdgpu_amdkfd_fence.o \ amdgpu_amdkfd_gfx_v8.o # add cgs diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 2a519f9..8d92f5c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -29,6 +29,8 @@ #include <linux/mmu_context.h> #include <kgd_kfd_interface.h> +extern const struct kgd2kfd_calls *kgd2kfd; + struct amdgpu_device; struct kgd_mem { @@ -37,6 +39,19 @@ struct kgd_mem { void *cpu_ptr; }; +/* KFD Memory Eviction */ +struct amdgpu_amdkfd_fence { + struct dma_fence base; + void *mm; + spinlock_t lock; + char timeline_name[TASK_COMM_LEN]; +}; + +struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context, + void *mm); +bool amd_kfd_fence_check_mm(struct dma_fence *f, void *mm); +struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f); + int amdgpu_amdkfd_init(void); void amdgpu_amdkfd_fini(void); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c new file mode 100644 index 0000000..252e44e --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c @@ -0,0 +1,196 @@ +/* + * Copyright 2016-2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <linux/dma-fence.h> +#include <linux/spinlock.h> +#include <linux/atomic.h> +#include <linux/stacktrace.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include "amdgpu_amdkfd.h" + +const struct dma_fence_ops amd_kfd_fence_ops; +static atomic_t fence_seq = ATOMIC_INIT(0); + +static int amd_kfd_fence_signal(struct dma_fence *f); + +/* Eviction Fence + * Fence helper functions to deal with KFD memory eviction. + * Big Idea - Since KFD submissions are done by user queues, a BO cannot be + * evicted unless all the user queues for that process are evicted. + * + * All the BOs in a process share an eviction fence. When process X wants + * to map VRAM memory but TTM can't find enough space, TTM will attempt to + * evict BOs from its LRU list. TTM checks if the BO is valuable to evict + * by calling ttm_bo_driver->eviction_valuable(). + * + * ttm_bo_driver->eviction_valuable() - will return false if the BO belongs + * to process X. Otherwise, it will return true to indicate BO can be + * evicted by TTM. + * + * If ttm_bo_driver->eviction_valuable returns true, then TTM will continue + * the evcition process for that BO by calling ttm_bo_evict --> amdgpu_bo_move + * --> amdgpu_copy_buffer(). This sets up job in GPU scheduler. + * + * GPU Scheduler (amd_sched_main) - sets up a cb (fence_add_callback) to + * nofity when the BO is free to move. fence_add_callback --> enable_signaling + * --> amdgpu_amdkfd_fence.enable_signaling + * + * amdgpu_amdkfd_fence.enable_signaling - Start a work item that will quiesce + * user queues and signal fence. The work item will also start another delayed + * work item to restore BOs + */ + +struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context, + void *mm) +{ + struct amdgpu_amdkfd_fence *fence = NULL; + + fence = kzalloc(sizeof(*fence), GFP_KERNEL); + if (fence == NULL) + return NULL; + + /* mm_struct mm is used as void pointer to identify the parent + * KFD process. Don't dereference it. Fence and any threads using + * mm is guranteed to be released before process termination. + */ + fence->mm = mm; + get_task_comm(fence->timeline_name, current); + spin_lock_init(&fence->lock); + + dma_fence_init(&fence->base, &amd_kfd_fence_ops, &fence->lock, + context, atomic_inc_return(&fence_seq)); + + return fence; +} + +struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f) +{ + struct amdgpu_amdkfd_fence *fence; + + if (!f) + return NULL; + + fence = container_of(f, struct amdgpu_amdkfd_fence, base); + if (fence && f->ops == &amd_kfd_fence_ops) + return fence; + + return NULL; +} + +static const char *amd_kfd_fence_get_driver_name(struct dma_fence *f) +{ + return "amdgpu_amdkfd_fence"; +} + +static const char *amd_kfd_fence_get_timeline_name(struct dma_fence *f) +{ + struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f); + + return fence->timeline_name; +} + +/** + * amd_kfd_fence_enable_signaling - This gets called when TTM wants to evict + * a KFD BO and schedules a job to move the BO. + * If fence is already signaled return true. + * If fence is not signaled schedule a evict KFD process work item. + */ +static bool amd_kfd_fence_enable_signaling(struct dma_fence *f) +{ + struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f); + + if (!fence) + return false; + + if (dma_fence_is_signaled(f)) + return true; + + if (!kgd2kfd->schedule_evict_and_restore_process( + (struct mm_struct *)fence->mm, f)) + return true; + + return false; +} + +static int amd_kfd_fence_signal(struct dma_fence *f) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(f->lock, flags); + /* Set enabled bit so cb will called */ + set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &f->flags); + ret = dma_fence_signal_locked(f); + spin_unlock_irqrestore(f->lock, flags); + + return ret; +} + +/** + * amd_kfd_fence_release - callback that fence can be freed + * + * @fence: fence + * + * This function is called when the reference count becomes zero. + * It just RCU schedules freeing up the fence. + */ +static void amd_kfd_fence_release(struct dma_fence *f) +{ + struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f); + /* Unconditionally signal the fence. The process is getting + * terminated. + */ + if (WARN_ON(!fence)) + return; /* Not an amdgpu_amdkfd_fence */ + + amd_kfd_fence_signal(f); + kfree_rcu(f, rcu); +} + +/** + * amd_kfd_fence_check_mm - Check if @mm is same as that of the fence @f + * if same return TRUE else return FALSE. + * + * @f: [IN] fence + * @mm: [IN] mm that needs to be verified + */ +bool amd_kfd_fence_check_mm(struct dma_fence *f, void *mm) +{ + struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f); + + if (!fence) + return false; + else if (fence->mm == mm) + return true; + + return false; +} + +const struct dma_fence_ops amd_kfd_fence_ops = { + .get_driver_name = amd_kfd_fence_get_driver_name, + .get_timeline_name = amd_kfd_fence_get_timeline_name, + .enable_signaling = amd_kfd_fence_enable_signaling, + .signaled = NULL, + .wait = dma_fence_default_wait, + .release = amd_kfd_fence_release, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 65d5a4e..ca00dd2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -36,8 +36,9 @@ #define AMDGPU_MAX_UVD_ENC_RINGS 2 /* some special values for the owner field */ -#define AMDGPU_FENCE_OWNER_UNDEFINED ((void*)0ul) -#define AMDGPU_FENCE_OWNER_VM ((void*)1ul) +#define AMDGPU_FENCE_OWNER_UNDEFINED ((void *)0ul) +#define AMDGPU_FENCE_OWNER_VM ((void *)1ul) +#define AMDGPU_FENCE_OWNER_KFD ((void *)2ul) #define AMDGPU_FENCE_FLAG_64BIT (1 << 0) #define AMDGPU_FENCE_FLAG_INT (1 << 1) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index df65c66..0cb31d9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -31,6 +31,7 @@ #include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_trace.h" +#include "amdgpu_amdkfd.h" struct amdgpu_sync_entry { struct hlist_node node; @@ -86,10 +87,18 @@ static bool amdgpu_sync_same_dev(struct amdgpu_device *adev, static void *amdgpu_sync_get_owner(struct dma_fence *f) { struct drm_sched_fence *s_fence = to_drm_sched_fence(f); + struct amdgpu_amdkfd_fence *kfd_fence; + + if (!f) + return AMDGPU_FENCE_OWNER_UNDEFINED; if (s_fence) return s_fence->owner; + kfd_fence = to_amdgpu_amdkfd_fence(f); + if (kfd_fence) + return AMDGPU_FENCE_OWNER_KFD; + return AMDGPU_FENCE_OWNER_UNDEFINED; } @@ -204,11 +213,18 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, for (i = 0; i < flist->shared_count; ++i) { f = rcu_dereference_protected(flist->shared[i], reservation_object_held(resv)); + /* We only want to trigger KFD eviction fences on + * evict or move jobs. Skip KFD fences otherwise. + */ + fence_owner = amdgpu_sync_get_owner(f); + if (fence_owner == AMDGPU_FENCE_OWNER_KFD && + owner != AMDGPU_FENCE_OWNER_UNDEFINED) + continue; + if (amdgpu_sync_same_dev(adev, f)) { /* VM updates are only interesting * for other VM updates and moves. */ - fence_owner = amdgpu_sync_get_owner(f); if ((owner != AMDGPU_FENCE_OWNER_UNDEFINED) && (fence_owner != AMDGPU_FENCE_OWNER_UNDEFINED) && ((owner == AMDGPU_FENCE_OWNER_VM) != diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index e4bb435..c3f33d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -46,6 +46,7 @@ #include "amdgpu.h" #include "amdgpu_object.h" #include "amdgpu_trace.h" +#include "amdgpu_amdkfd.h" #include "bif/bif_4_1_d.h" #define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT) @@ -1170,6 +1171,23 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, { unsigned long num_pages = bo->mem.num_pages; struct drm_mm_node *node = bo->mem.mm_node; + struct reservation_object_list *flist; + struct dma_fence *f; + int i; + + /* If bo is a KFD BO, check if the bo belongs to the current process. + * If true, then return false as any KFD process needs all its BOs to + * be resident to run successfully + */ + flist = reservation_object_get_list(bo->resv); + if (flist) { + for (i = 0; i < flist->shared_count; ++i) { + f = rcu_dereference_protected(flist->shared[i], + reservation_object_held(bo->resv)); + if (amd_kfd_fence_check_mm(f, current->mm)) + return false; + } + } switch (bo->mem.mem_type) { case TTM_PL_TT: diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 94eab54..9e35249 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -30,6 +30,7 @@ #include <linux/types.h> #include <linux/bitmap.h> +#include <linux/dma-fence.h> struct pci_dev; @@ -286,6 +287,9 @@ struct kfd2kgd_calls { * * @resume: Notifies amdkfd about a resume action done to a kgd device * + * @schedule_evict_and_restore_process: Schedules work queue that will prepare + * for safe eviction of KFD BOs that belong to the specified process. + * * This structure contains function callback pointers so the kgd driver * will notify to the amdkfd about certain status changes. * @@ -300,6 +304,8 @@ struct kgd2kfd_calls { void (*interrupt)(struct kfd_dev *kfd, const void *ih_ring_entry); void (*suspend)(struct kfd_dev *kfd); int (*resume)(struct kfd_dev *kfd); + int (*schedule_evict_and_restore_process)(struct mm_struct *mm, + struct dma_fence *fence); }; int kgd2kfd_init(unsigned interface_version, -- 2.7.4