This results in loading the wrong VM object on PASID query.
To correct this, setup the lookup table to be per-partition-per-PASID
instead.
Signed-off-by: Jonathan Kim <jonathan.kim@xxxxxxx>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 12 ++++
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 +
drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 4 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 7 ++-
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 55 +++++++++++--------
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 11 +++-
drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 5 +-
drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c | 5 +-
drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c | 5 +-
drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 3 +-
drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 5 +-
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 16 ++----
drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 2 +-
drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 4 +-
drivers/gpu/drm/amd/amdkfd/kfd_events.c | 3 +-
.../gpu/drm/amd/amdkfd/kfd_int_process_v10.c | 8 +--
.../gpu/drm/amd/amdkfd/kfd_int_process_v9.c | 8 +--
drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c | 3 +-
18 files changed, 92 insertions(+), 65 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index c272461d70a9..28db789610e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -887,3 +887,15 @@ int amdgpu_amdkfd_unmap_hiq(struct amdgpu_device *adev, u32 doorbell_off,
return r;
}
+
+int amdgpu_amdkfd_node_id_to_xcc_id(struct amdgpu_device *adev, uint32_t node_id)
+{
+ if (adev->gfx.funcs->ih_node_to_logical_xcc) {
+ int xcc_id = adev->gfx.funcs->ih_node_to_logical_xcc(adev, node_id);
+
+ if (xcc_id >= 0)
+ return xcc_id;
+ }
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 4ed49265c764..bf8bb45d8ab6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -356,6 +356,7 @@ void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
uint64_t size, u32 alloc_flag, int8_t xcp_id);
u64 amdgpu_amdkfd_xcp_memory_size(struct amdgpu_device *adev, int xcp_id);
+int amdgpu_amdkfd_node_id_to_xcc_id(struct amdgpu_device *adev, uint32_t node_id);
#define KFD_XCP_MEM_ID(adev, xcp_id) \
((adev)->xcp_mgr && (xcp_id) >= 0 ?\
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index c6a1783fc9ef..bf9f8802e18d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -37,7 +37,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
struct amdgpu_job *job = to_amdgpu_job(s_job);
struct amdgpu_task_info *ti;
struct amdgpu_device *adev = ring->adev;
- int idx;
+ int idx, xcp_id = !job->vm ? 0 : job->vm->xcp_id;
int r;
if (!drm_dev_enter(adev_to_drm(adev), &idx)) {
@@ -62,7 +62,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
job->base.sched->name, atomic_read(&ring->fence_drv.last_seq),
ring->fence_drv.sync_seq);
- ti = amdgpu_vm_get_task_info_pasid(ring->adev, job->pasid);
+ ti = amdgpu_vm_get_task_info_pasid(ring->adev, job->pasid, xcp_id);
if (ti) {
dev_err(adev->dev,
"Process information: process %s pid %d thread %s pid %d\n",
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index d9fde38f6ee2..e413bf4a3e84 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -1275,17 +1275,20 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
struct amdgpu_vm *vm = &fpriv->vm;
struct drm_amdgpu_info_gpuvm_fault gpuvm_fault;
unsigned long flags;
+ int i;
if (!vm)
return -EINVAL;
memset(&gpuvm_fault, 0, sizeof(gpuvm_fault));
- xa_lock_irqsave(&adev->vm_manager.pasids, flags);
+ for (i = 0; i < adev->xcp_mgr->num_xcps; i++)
+ xa_lock_irqsave(&adev->vm_manager.pasids[i], flags);
gpuvm_fault.addr = vm->fault_info.addr;
gpuvm_fault.status = vm->fault_info.status;
gpuvm_fault.vmhub = vm->fault_info.vmhub;
- xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
+ for (i = 0; i < adev->xcp_mgr->num_xcps; i++)
+ xa_unlock_irqrestore(&adev->vm_manager.pasids[i], flags);
return copy_to_user(out, &gpuvm_fault,
min((size_t)size, sizeof(gpuvm_fault))) ? -EFAULT : 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index bcb729094521..f43e1c15f423 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -146,7 +146,7 @@ int amdgpu_vm_set_pasid(struct amdgpu_device *adev, struct amdgpu_vm *vm,
return 0;
if (vm->pasid) {
- r = xa_err(xa_erase_irq(&adev->vm_manager.pasids, vm->pasid));
+ r = xa_err(xa_erase_irq(&adev->vm_manager.pasids[vm->xcp_id], vm->pasid));
if (r < 0)
return r;
@@ -154,7 +154,7 @@ int amdgpu_vm_set_pasid(struct amdgpu_device *adev, struct amdgpu_vm *vm,
}
if (pasid) {
- r = xa_err(xa_store_irq(&adev->vm_manager.pasids, pasid, vm,
+ r = xa_err(xa_store_irq(&adev->vm_manager.pasids[vm->xcp_id], pasid, vm,
GFP_KERNEL));
if (r < 0)
return r;
@@ -2288,14 +2288,14 @@ static void amdgpu_vm_destroy_task_info(struct kref *kref)
}
static inline struct amdgpu_vm *
-amdgpu_vm_get_vm_from_pasid(struct amdgpu_device *adev, u32 pasid)
+amdgpu_vm_get_vm_from_pasid(struct amdgpu_device *adev, u32 pasid, u32 xcp_id)
{
struct amdgpu_vm *vm;
unsigned long flags;
- xa_lock_irqsave(&adev->vm_manager.pasids, flags);
- vm = xa_load(&adev->vm_manager.pasids, pasid);
- xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
+ xa_lock_irqsave(&adev->vm_manager.pasids[xcp_id], flags);
+ vm = xa_load(&adev->vm_manager.pasids[xcp_id], pasid);
+ xa_unlock_irqrestore(&adev->vm_manager.pasids[xcp_id], flags);
return vm;
}
@@ -2343,10 +2343,10 @@ amdgpu_vm_get_task_info_vm(struct amdgpu_vm *vm)
* referenced down with amdgpu_vm_put_task_info.
*/
struct amdgpu_task_info *
-amdgpu_vm_get_task_info_pasid(struct amdgpu_device *adev, u32 pasid)
+amdgpu_vm_get_task_info_pasid(struct amdgpu_device *adev, u32 pasid, u32 xcp_id)
{
return amdgpu_vm_get_task_info_vm(
- amdgpu_vm_get_vm_from_pasid(adev, pasid));
+ amdgpu_vm_get_vm_from_pasid(adev, pasid, xcp_id));
}
static int amdgpu_vm_create_task_info(struct amdgpu_vm *vm)
@@ -2481,6 +2481,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
amdgpu_bo_unreserve(vm->root.bo);
amdgpu_bo_unref(&root_bo);
+ vm->xcp_id = xcp_id < 0 ? 0 : xcp_id;
+
return 0;
error_free_root:
@@ -2695,8 +2697,8 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
#else
adev->vm_manager.vm_update_mode = 0;
#endif
-
- xa_init_flags(&adev->vm_manager.pasids, XA_FLAGS_LOCK_IRQ);
+ for (i = 0; i < MAX_XCP; i++)
+ xa_init_flags(&(adev->vm_manager.pasids[i]), XA_FLAGS_LOCK_IRQ);
}
/**
@@ -2708,10 +2710,15 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
*/
void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
{
- WARN_ON(!xa_empty(&adev->vm_manager.pasids));
- xa_destroy(&adev->vm_manager.pasids);
+ int i;
+
+ for (i = 0; i < MAX_XCP; i++) {
+ WARN_ON(!xa_empty(&adev->vm_manager.pasids[i]));
+ xa_destroy(&adev->vm_manager.pasids[i]);
+ }
amdgpu_vmid_mgr_fini(adev);
+
}
/**
@@ -2778,17 +2785,18 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
unsigned long irqflags;
uint64_t value, flags;
struct amdgpu_vm *vm;
- int r;
+ int r, xcp_id;
- xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
- vm = xa_load(&adev->vm_manager.pasids, pasid);
+ xcp_id = amdgpu_amdkfd_node_id_to_xcc_id(adev, node_id)/adev->gfx.num_xcc_per_xcp;
+ xa_lock_irqsave(&adev->vm_manager.pasids[xcp_id], irqflags);
+ vm = xa_load(&adev->vm_manager.pasids[xcp_id], pasid);
if (vm) {
root = amdgpu_bo_ref(vm->root.bo);
is_compute_context = vm->is_compute_context;
} else {
root = NULL;
}
- xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
+ xa_unlock_irqrestore(&adev->vm_manager.pasids[xcp_id], irqflags);
if (!root)
return false;
@@ -2806,11 +2814,11 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
goto error_unref;
/* Double check that the VM still exists */
- xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
- vm = xa_load(&adev->vm_manager.pasids, pasid);
+ xa_lock_irqsave(&adev->vm_manager.pasids[xcp_id], irqflags);
+ vm = xa_load(&adev->vm_manager.pasids[xcp_id], pasid);
if (vm && vm->root.bo != root)
vm = NULL;
- xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
+ xa_unlock_irqrestore(&adev->vm_manager.pasids[xcp_id], irqflags);
if (!vm)
goto error_unlock;
@@ -2968,14 +2976,15 @@ void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev,
unsigned int pasid,
uint64_t addr,
uint32_t status,
- unsigned int vmhub)
+ unsigned int vmhub,
+ uint32_t xcp_id)
{
struct amdgpu_vm *vm;
unsigned long flags;
- xa_lock_irqsave(&adev->vm_manager.pasids, flags);
+ xa_lock_irqsave(&adev->vm_manager.pasids[xcp_id], flags);
- vm = xa_load(&adev->vm_manager.pasids, pasid);
+ vm = xa_load(&adev->vm_manager.pasids[xcp_id], pasid);
/* Don't update the fault cache if status is 0. In the multiple
* fault case, subsequent faults will return a 0 status which is
* useless for userspace and replaces the useful fault status, so
@@ -3008,7 +3017,7 @@ void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev,
WARN_ONCE(1, "Invalid vmhub %u\n", vmhub);
}
}
- xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
+ xa_unlock_irqrestore(&adev->vm_manager.pasids[xcp_id], flags);
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 046949c4b695..1499f5f731e9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -35,6 +35,7 @@
#include "amdgpu_sync.h"
#include "amdgpu_ring.h"
#include "amdgpu_ids.h"
+#include "amdgpu_xcp.h"
struct drm_exec;
@@ -418,6 +419,9 @@ struct amdgpu_vm {
/* cached fault info */
struct amdgpu_vm_fault_info fault_info;
+
+ /* XCP ID */
+ int xcp_id;
};
struct amdgpu_vm_manager {
@@ -456,7 +460,7 @@ struct amdgpu_vm_manager {
/* PASID to VM mapping, will be used in interrupt context to
* look up VM of a page fault
*/
- struct xarray pasids;
+ struct xarray pasids[MAX_XCP];
/* Global registration of recent page fault information */
struct amdgpu_vm_fault_info fault_info;
};
@@ -550,7 +554,7 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev);
struct amdgpu_task_info *
-amdgpu_vm_get_task_info_pasid(struct amdgpu_device *adev, u32 pasid);
+amdgpu_vm_get_task_info_pasid(struct amdgpu_device *adev, u32 pasid, u32 xcp_id);
struct amdgpu_task_info *
amdgpu_vm_get_task_info_vm(struct amdgpu_vm *vm);
@@ -649,7 +653,8 @@ void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev,
unsigned int pasid,
uint64_t addr,
uint32_t status,
- unsigned int vmhub);
+ unsigned int vmhub,
+ uint32_t xcp_id);
void amdgpu_vm_tlb_fence_create(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
struct dma_fence **fence);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index f0ceab3ce5bf..24b042febf5c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -151,7 +151,8 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev,
WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status,
- entry->vmid_src ? AMDGPU_MMHUB0(0) : AMDGPU_GFXHUB(0));
+ entry->vmid_src ? AMDGPU_MMHUB0(0) : AMDGPU_GFXHUB(0),
+ 0);
}
if (!printk_ratelimit())
@@ -161,7 +162,7 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev,
"[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n",
entry->vmid_src ? "mmhub" : "gfxhub",
entry->src_id, entry->ring_id, entry->vmid, entry->pasid);
- task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
+ task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid, 0);
if (task_info) {
dev_err(adev->dev,
" in process %s pid %d thread %s pid %d\n",
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index 2797fd84432b..3507046d33e6 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -122,7 +122,8 @@ static int gmc_v11_0_process_interrupt(struct amdgpu_device *adev,
WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status,
- entry->vmid_src ? AMDGPU_MMHUB0(0) : AMDGPU_GFXHUB(0));
+ entry->vmid_src ? AMDGPU_MMHUB0(0) : AMDGPU_GFXHUB(0),
+ 0);
}
if (printk_ratelimit()) {
@@ -132,7 +133,7 @@ static int gmc_v11_0_process_interrupt(struct amdgpu_device *adev,
"[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n",
entry->vmid_src ? "mmhub" : "gfxhub",
entry->src_id, entry->ring_id, entry->vmid, entry->pasid);
- task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
+ task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid, 0);
if (task_info) {
dev_err(adev->dev,
" in process %s pid %d thread %s pid %d)\n",
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
index 60acf676000b..9844564c6c74 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
@@ -115,7 +115,8 @@ static int gmc_v12_0_process_interrupt(struct amdgpu_device *adev,
WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status,
- entry->vmid_src ? AMDGPU_MMHUB0(0) : AMDGPU_GFXHUB(0));
+ entry->vmid_src ? AMDGPU_MMHUB0(0) : AMDGPU_GFXHUB(0),
+ 0);
}
if (printk_ratelimit()) {
@@ -125,7 +126,7 @@ static int gmc_v12_0_process_interrupt(struct amdgpu_device *adev,
"[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n",
entry->vmid_src ? "mmhub" : "gfxhub",
entry->src_id, entry->ring_id, entry->vmid, entry->pasid);
- task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
+ task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid, 0);
if (task_info) {
dev_err(adev->dev,
" in process %s pid %d thread %s pid %d)\n",
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 994432fb57ea..2cdb0cbb7c4d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -1268,7 +1268,8 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev,
return 0;
amdgpu_vm_update_fault_cache(adev, entry->pasid,
- ((u64)addr) << AMDGPU_GPU_PAGE_SHIFT, status, AMDGPU_GFXHUB(0));
+ ((u64)addr) << AMDGPU_GPU_PAGE_SHIFT, status,
+ AMDGPU_GFXHUB(0), 0);
if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_FIRST)
gmc_v7_0_set_fault_enable_default(adev, false);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 86488c052f82..6855caeb7f74 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -1437,7 +1437,8 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
return 0;
amdgpu_vm_update_fault_cache(adev, entry->pasid,
- ((u64)addr) << AMDGPU_GPU_PAGE_SHIFT, status, AMDGPU_GFXHUB(0));
+ ((u64)addr) << AMDGPU_GPU_PAGE_SHIFT, status,
+ AMDGPU_GFXHUB(0), 0);
if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_FIRST)
gmc_v8_0_set_fault_enable_default(adev, false);
@@ -1448,7 +1449,7 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n",
entry->src_id, entry->src_data[0]);
- task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
+ task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid, 0);
if (task_info) {
dev_err(adev->dev, " for process %s pid %d thread %s pid %d\n",
task_info->process_name, task_info->tgid,
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index b73136d390cc..e183e08b2c02 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -556,10 +556,12 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
unsigned int vmhub;
u64 addr;
uint32_t cam_index = 0;
- int ret, xcc_id = 0;
- uint32_t node_id;
+ int ret;
+ uint32_t node_id, xcc_id, xcp_id;
node_id = entry->node_id;
+ xcc_id = amdgpu_amdkfd_node_id_to_xcc_id(adev, node_id);
+ xcp_id = xcc_id/adev->gfx.num_xcc_per_xcp;
addr = (u64)entry->src_data[0] << 12;
addr |= ((u64)entry->src_data[1] & 0xf) << 44;
@@ -572,12 +574,6 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
vmhub = AMDGPU_MMHUB1(0);
} else {
hub_name = "gfxhub0";
- if (adev->gfx.funcs->ih_node_to_logical_xcc) {
- xcc_id = adev->gfx.funcs->ih_node_to_logical_xcc(adev,
- node_id);
- if (xcc_id < 0)
- xcc_id = 0;
- }
vmhub = xcc_id;
}
hub = &adev->vmhub[vmhub];
@@ -631,7 +627,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
retry_fault ? "retry" : "no-retry",
entry->src_id, entry->ring_id, entry->vmid, entry->pasid);
- task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
+ task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid, xcp_id);
if (task_info) {
dev_err(adev->dev,
" for process %s pid %d thread %s pid %d)\n",
@@ -675,7 +671,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
if (!amdgpu_sriov_vf(adev))
WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
- amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status, vmhub);
+ amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status, vmhub, xcp_id);
dev_err(adev->dev,
"VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 23ef4eb36b40..1ac4224bbe5b 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -2182,7 +2182,7 @@ static int sdma_v4_0_print_iv_entry(struct amdgpu_device *adev,
instance, addr, entry->src_id, entry->ring_id, entry->vmid,
entry->pasid);
- task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
+ task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid, 0);
if (task_info) {
dev_dbg_ratelimited(adev->dev,
" for process %s pid %d thread %s pid %d\n",
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
index 57f16c09abfc..c8b5c0302ca7 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
@@ -1683,6 +1683,8 @@ static int sdma_v4_4_2_print_iv_entry(struct amdgpu_device *adev,
int instance;
struct amdgpu_task_info *task_info;
u64 addr;
+ uint32_t xcc_id = amdgpu_amdkfd_node_id_to_xcc_id(adev, entry->node_id);
+ uint32_t xcp_id = xcc_id/adev->gfx.num_xcc_per_xcp;
instance = sdma_v4_4_2_irq_id_to_seq(adev, entry->client_id);
if (instance < 0 || instance >= adev->sdma.num_instances) {
@@ -1698,7 +1700,7 @@ static int sdma_v4_4_2_print_iv_entry(struct amdgpu_device *adev,
instance, addr, entry->src_id, entry->ring_id, entry->vmid,
entry->pasid);
- task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
+ task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid, xcp_id);
if (task_info) {
dev_dbg_ratelimited(adev->dev, " for process %s pid %d thread %s pid %d\n",
task_info->process_name, task_info->tgid,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index ea3792249209..c098fbaf0e1c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -1262,8 +1262,9 @@ void kfd_signal_reset_event(struct kfd_node *dev)
if (dev->dqm->detect_hang_count) {
struct amdgpu_task_info *ti;
+ uint32_t xcp_id = dev->xcp ? dev->xcp->id : 0;
- ti = amdgpu_vm_get_task_info_pasid(dev->adev, p->pasid);
+ ti = amdgpu_vm_get_task_info_pasid(dev->adev, p->pasid, xcp_id);
if (ti) {
dev_err(dev->adev->dev,
"Queues reset on process %s tid %d thread %s pid %d\n",
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
index 8e0d0356e810..d7cbf9525698 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
@@ -377,12 +377,8 @@ static void event_interrupt_wq_v10(struct kfd_node *dev,
struct kfd_hsa_memory_exception_data exception_data;
/* gfxhub */
- if (!vmid_type && dev->adev->gfx.funcs->ih_node_to_logical_xcc) {
- hub_inst = dev->adev->gfx.funcs->ih_node_to_logical_xcc(dev->adev,
- node_id);
- if (hub_inst < 0)
- hub_inst = 0;
- }
+ if (!vmid_type)
+ hub_inst = amdgpu_amdkfd_node_id_to_xcc_id(dev->adev, node_id);
/* mmhub */
if (vmid_type && client_id == SOC15_IH_CLIENTID_VMC)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
index a9c3580be8c9..4708b8c811a5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
@@ -437,12 +437,8 @@ static void event_interrupt_wq_v9(struct kfd_node *dev,
struct kfd_hsa_memory_exception_data exception_data;
/* gfxhub */
- if (!vmid_type && dev->adev->gfx.funcs->ih_node_to_logical_xcc) {
- hub_inst = dev->adev->gfx.funcs->ih_node_to_logical_xcc(dev->adev,
- node_id);
- if (hub_inst < 0)
- hub_inst = 0;
- }
+ if (!vmid_type)
+ hub_inst = amdgpu_amdkfd_node_id_to_xcc_id(dev->adev, node_id);
/* mmhub */
if (vmid_type && client_id == SOC15_IH_CLIENTID_VMC)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
index ea6a8e43bd5b..b5f2f5b1069c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
@@ -251,8 +251,9 @@ void kfd_smi_event_update_thermal_throttling(struct kfd_node *dev,
void kfd_smi_event_update_vmfault(struct kfd_node *dev, uint16_t pasid)
{
struct amdgpu_task_info *task_info;
+ uint32_t xcp_id = dev->xcp ? dev->xcp->id : 0;
- task_info = amdgpu_vm_get_task_info_pasid(dev->adev, pasid);
+ task_info = amdgpu_vm_get_task_info_pasid(dev->adev, pasid, xcp_id);
if (task_info) {
/* Report VM faults from user applications, not retry from kernel */
if (task_info->pid)