Change SVM range mapping flags or access attributes don't trigger
migration, if range is already mapped on GPUs we should update GPU
mapping and pass flush_tlb flag true to amdgpu vm.
Change SVM range preferred_loc or migration granularity don't need
update GPU mapping, skip the validate_and_map.
Signed-off-by: Philip Yang <Philip.Yang@xxxxxxx>
---
drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 46 +++++++++++++++++++---------
1 file changed, 32 insertions(+), 14 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 8a077cd066a1..e740384df9c7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -686,7 +686,8 @@ svm_range_check_attr(struct kfd_process *p,
static void
svm_range_apply_attrs(struct kfd_process *p, struct svm_range *prange,
- uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs)
+ uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs,
+ bool *update_mapping)
{
uint32_t i;
int gpuidx;
@@ -702,6 +703,7 @@ svm_range_apply_attrs(struct kfd_process *p,
struct svm_range *prange,
case KFD_IOCTL_SVM_ATTR_ACCESS:
case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE:
case KFD_IOCTL_SVM_ATTR_NO_ACCESS:
+ *update_mapping = true;
gpuidx = kfd_process_gpuidx_from_gpuid(p,
attrs[i].value);
if (attrs[i].type == KFD_IOCTL_SVM_ATTR_NO_ACCESS) {
@@ -716,9 +718,11 @@ svm_range_apply_attrs(struct kfd_process *p,
struct svm_range *prange,
}
break;
case KFD_IOCTL_SVM_ATTR_SET_FLAGS:
+ *update_mapping = true;
prange->flags |= attrs[i].value;
break;
case KFD_IOCTL_SVM_ATTR_CLR_FLAGS:
+ *update_mapping = true;
prange->flags &= ~attrs[i].value;
break;
case KFD_IOCTL_SVM_ATTR_GRANULARITY:
@@ -1254,7 +1258,7 @@ static int
svm_range_map_to_gpu(struct kfd_process_device *pdd, struct
svm_range *prange,
unsigned long offset, unsigned long npages, bool
readonly,
dma_addr_t *dma_addr, struct amdgpu_device *bo_adev,
- struct dma_fence **fence)
+ struct dma_fence **fence, bool flush_tlb)
{
struct amdgpu_device *adev = pdd->dev->adev;
struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv);
@@ -1292,7 +1296,7 @@ svm_range_map_to_gpu(struct kfd_process_device
*pdd, struct svm_range *prange,
(last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0,
pte_flags);
- r = amdgpu_vm_update_range(adev, vm, false, false, false,
NULL,
+ r = amdgpu_vm_update_range(adev, vm, false, false,
flush_tlb, NULL,
last_start, prange->start + i,
pte_flags,
last_start - prange->start,
@@ -1326,7 +1330,7 @@ svm_range_map_to_gpu(struct kfd_process_device
*pdd, struct svm_range *prange,
static int
svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset,
unsigned long npages, bool readonly,
- unsigned long *bitmap, bool wait)
+ unsigned long *bitmap, bool wait, bool flush_tlb)
{
struct kfd_process_device *pdd;
struct amdgpu_device *bo_adev;
@@ -1361,7 +1365,8 @@ svm_range_map_to_gpus(struct svm_range *prange,
unsigned long offset,
r = svm_range_map_to_gpu(pdd, prange, offset, npages,
readonly,
prange->dma_addr[gpuidx],
- bo_adev, wait ? &fence : NULL);
+ bo_adev, wait ? &fence : NULL,
+ flush_tlb);
if (r)
break;
@@ -1482,8 +1487,8 @@ static void *kfd_svm_page_owner(struct
kfd_process *p, int32_t gpuidx)
* 5. Release page table (and SVM BO) reservation
*/
static int svm_range_validate_and_map(struct mm_struct *mm,
- struct svm_range *prange,
- int32_t gpuidx, bool intr, bool wait)
+ struct svm_range *prange, int32_t gpuidx,
+ bool intr, bool wait, bool flush_tlb)
{
struct svm_validate_context ctx;
unsigned long start, end, addr;
@@ -1522,8 +1527,12 @@ static int svm_range_validate_and_map(struct
mm_struct *mm,
prange->bitmap_aip, MAX_GPU_INSTANCE);
}
- if (bitmap_empty(ctx.bitmap, MAX_GPU_INSTANCE))
- return 0;
+ if (bitmap_empty(ctx.bitmap, MAX_GPU_INSTANCE)) {
+ if (!prange->mapped_to_gpu)
+ return 0;
+
+ bitmap_copy(ctx.bitmap, prange->bitmap_access,
MAX_GPU_INSTANCE);
+ }
if (prange->actual_loc && !prange->ttm_res) {
/* This should never happen. actual_loc gets set by
@@ -1595,7 +1604,7 @@ static int svm_range_validate_and_map(struct
mm_struct *mm,
}
r = svm_range_map_to_gpus(prange, offset, npages, readonly,
- ctx.bitmap, wait);
+ ctx.bitmap, wait, flush_tlb);
unlock_out:
svm_range_unlock(prange);
@@ -1691,7 +1700,7 @@ static void svm_range_restore_work(struct
work_struct *work)
mutex_lock(&prange->migrate_mutex);
r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
- false, true);
+ false, true, false);
if (r)
pr_debug("failed %d to map 0x%lx to gpus\n", r,
prange->start);
@@ -2847,7 +2856,7 @@ svm_range_restore_pages(struct amdgpu_device
*adev, unsigned int pasid,
}
}
- r = svm_range_validate_and_map(mm, prange, gpuidx, false, false);
+ r = svm_range_validate_and_map(mm, prange, gpuidx, false, false,
false);
if (r)
pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n",
r, svms, prange->start, prange->last);
@@ -3264,6 +3273,8 @@ svm_range_set_attr(struct kfd_process *p,
struct mm_struct *mm,
struct svm_range_list *svms;
struct svm_range *prange;
struct svm_range *next;
+ bool update_mapping = false;
+ bool flush_tlb;
int r = 0;
pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] pages 0x%llx\n",
@@ -3302,7 +3313,7 @@ svm_range_set_attr(struct kfd_process *p,
struct mm_struct *mm,
svm_range_add_notifier_locked(mm, prange);
}
list_for_each_entry(prange, &update_list, update_list) {
- svm_range_apply_attrs(p, prange, nattr, attrs);
+ svm_range_apply_attrs(p, prange, nattr, attrs,
&update_mapping);
/* TODO: unmap ranges from GPU that lost access */
}
list_for_each_entry_safe(prange, next, &remove_list,
update_list) {
@@ -3335,8 +3346,15 @@ svm_range_set_attr(struct kfd_process *p,
struct mm_struct *mm,
continue;
}
+ if (!migrated && !update_mapping) {
+ mutex_unlock(&prange->migrate_mutex);
+ continue;
+ }
+
+ flush_tlb = !migrated && update_mapping &&
prange->mapped_to_gpu;
+