On 2022-09-12 14:53, Felix Kuehling wrote:
Am 2022-09-12 um 14:04 schrieb Philip Yang:
Unified memory usage with xnack off is tracked to avoid oversubscribe
system memory. When changing xnack mode from off to on, subsequent
free ranges allocated with xnack off will not unreserve memory because
xnack is changed to on, cause memory accounting unbalanced.
To you need something equivalent to reserve already allocated memory
when switching XNACK off?
One more comment inline.
Yes, I get error message "KFD system memory accounting unbalanced" when
switching XNACK off too, I will send v2 patch to handle this case.
Thanks,
Philip
Unreserve memory of the ranges allocated with xnack off when switching
to xnack on.
Signed-off-by: Philip Yang <Philip.Yang@xxxxxxx>
---
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 16 ++++++++++++++--
drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 14 ++++++++++++++
drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 3 +++
3 files changed, 31 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 56f7307c21d2..1855efeeaaa0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1594,16 +1594,28 @@ static int kfd_ioctl_set_xnack_mode(struct
file *filep,
if (args->xnack_enabled >= 0) {
if (!list_empty(&p->pqm.queues)) {
pr_debug("Process has user queues running\n");
- mutex_unlock(&p->mutex);
- return -EBUSY;
+ r = -EBUSY;
+ goto out_unlock;
}
+
+ if (p->xnack_enabled == args->xnack_enabled)
+ goto out_unlock;
+
if (args->xnack_enabled && !kfd_process_xnack_mode(p, true))
r = -EPERM;
You should goto out_unlock here. It may not be strictly necessary, but
it's confusing to think of the cases where you may fall through here
unexpectedly.
Regards,
Felix
else
p->xnack_enabled = args->xnack_enabled;
+
+ /* Switching to XNACK on, unreserve memory of all ranges
+ * allocated with XNACK off.
+ */
+ if (p->xnack_enabled)
+ svm_range_list_unreserve_mem(p);
} else {
args->xnack_enabled = p->xnack_enabled;
}
+
+out_unlock:
mutex_unlock(&p->mutex);
return r;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index cf5b4005534c..5c333bacf546 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -2956,6 +2956,20 @@ svm_range_restore_pages(struct amdgpu_device
*adev, unsigned int pasid,
return r;
}
+void svm_range_list_unreserve_mem(struct kfd_process *p)
+{
+ struct svm_range *prange;
+ uint64_t size;
+
+ mutex_lock(&p->svms.lock);
+ list_for_each_entry(prange, &p->svms.list, list) {
+ size = (prange->last - prange->start + 1) << PAGE_SHIFT;
+ pr_debug("svms 0x%p size 0x%llx\n", &p->svms, size);
+ amdgpu_amdkfd_unreserve_mem_limit(NULL, size,
KFD_IOC_ALLOC_MEM_FLAGS_USERPTR);
+ }
+ mutex_unlock(&p->svms.lock);
+}
+
void svm_range_list_fini(struct kfd_process *p)
{
struct svm_range *prange;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
index 012c53729516..339f706673c8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
@@ -203,11 +203,14 @@ void svm_range_list_lock_and_flush_work(struct
svm_range_list *svms, struct mm_s
void svm_range_bo_unref_async(struct svm_range_bo *svm_bo);
void svm_range_set_max_pages(struct amdgpu_device *adev);
+void svm_range_list_unreserve_mem(struct kfd_process *p);
#else
struct kfd_process;
+void svm_range_list_unreserve_mem(struct kfd_process *p) { }
+
static inline int svm_range_list_init(struct kfd_process *p)
{
return 0;