+ if (ti)
+ kref_put(&ti->refcount, amdgpu_vm_free_task_info);
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 204ab13184ed..40a8c532a5ed 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -187,6 +187,7 @@ struct amdgpu_task_info {
char task_name[TASK_COMM_LEN];
pid_t pid;
pid_t tgid;
+ struct kref refcount;
};
/**
@@ -333,9 +334,6 @@ struct amdgpu_vm {
/* Valid while the PD is reserved or fenced */
uint64_t pd_phys_addr;
- /* Some basic info about the task */
- struct amdgpu_task_info task_info;
-
/* Store positions of group of BOs */
struct ttm_lru_bulk_move lru_bulk_move;
/* Flag to indicate if VM is used for compute */
@@ -466,8 +464,9 @@ bool amdgpu_vm_need_pipeline_sync(struct
amdgpu_ring *ring,
struct amdgpu_job *job);
void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev);
-void amdgpu_vm_get_task_info(struct amdgpu_device *adev, u32 pasid,
- struct amdgpu_task_info *task_info);
+struct amdgpu_task_info *
+amdgpu_vm_get_task_info(struct amdgpu_device *adev, u32 pasid);
+void amdgpu_vm_put_task_info(struct amdgpu_device *adev, u32 pasid);
bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
u32 vmid, u32 node_id, uint64_t addr,
bool write_fault);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
index 96d601e209b8..3d7a9ad963a8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -1023,21 +1023,24 @@ int amdgpu_vm_ptes_update(struct
amdgpu_vm_update_params *params,
uint64_t upd_end = min(entry_end, frag_end);
unsigned int nptes = (upd_end - frag_start) >> shift;
uint64_t upd_flags = flags | AMDGPU_PTE_FRAG(frag);
+ struct amdgpu_task_info *task_info;
/* This can happen when we set higher level PDs to
* silent to stop fault floods.
*/
nptes = max(nptes, 1u);
+ task_info = amdgpu_vm_get_task_info(adev, vm->pasid);
trace_amdgpu_vm_update_ptes(params, frag_start, upd_end,
min(nptes, 32u), dst, incr,
upd_flags,
- vm->task_info.tgid,
+ task_info ? task_info->tgid : 0,
vm->immediate.fence_context);
amdgpu_vm_pte_update_flags(params, to_amdgpu_bo_vm(pt),
cursor.level, pe_start, dst,
nptes, incr, upd_flags);
+ amdgpu_vm_put_task_info(adev, vm->pasid);
pe_start += nptes * 8;
dst += nptes * incr;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index fa87a85e1017..14ded13c8b09 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -107,7 +107,7 @@ static int gmc_v10_0_process_interrupt(struct
amdgpu_device *adev,
struct amdgpu_vmhub *hub = &adev->vmhub[vmhub_index];
bool retry_fault = !!(entry->src_data[1] & 0x80);
bool write_fault = !!(entry->src_data[1] & 0x20);
- struct amdgpu_task_info task_info;
+ struct amdgpu_task_info *task_info;
uint32_t status = 0;
u64 addr;
@@ -155,15 +155,18 @@ static int
gmc_v10_0_process_interrupt(struct amdgpu_device *adev,
if (!printk_ratelimit())
return 0;
- memset(&task_info, 0, sizeof(struct amdgpu_task_info));
- amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
-
- dev_err(adev->dev,
- "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u, for
process %s pid %d thread %s pid %d)\n",
+ dev_err(adev->dev, "[%s] page fault (src_id:%u ring:%u vmid:%u
pasid:%u)\n",
entry->vmid_src ? "mmhub" : "gfxhub",
entry->src_id, entry->ring_id, entry->vmid,
- entry->pasid, task_info.process_name, task_info.tgid,
- task_info.task_name, task_info.pid);
+ entry->pasid);
+
+ task_info = amdgpu_vm_get_task_info(adev, entry->pasid);
+ if (task_info)
+ dev_err(adev->dev, "for process %s pid %d thread %s pid
%d\n",
+ task_info->process_name, task_info->tgid,
+ task_info->task_name, task_info->pid);
+ amdgpu_vm_put_task_info(adev, entry->pasid);
+
dev_err(adev->dev, " in page starting at address 0x%016llx
from client 0x%x (%s)\n",
addr, entry->client_id,
soc15_ih_clientid_name[entry->client_id]);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index e3b76fd28d15..2d96567171bc 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -120,17 +120,21 @@ static int gmc_v11_0_process_interrupt(struct
amdgpu_device *adev,
}
if (printk_ratelimit()) {
- struct amdgpu_task_info task_info;
-
- memset(&task_info, 0, sizeof(struct amdgpu_task_info));
- amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
+ struct amdgpu_task_info *task_info;
dev_err(adev->dev,
- "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u,
for process %s pid %d thread %s pid %d)\n",
+ "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n",
entry->vmid_src ? "mmhub" : "gfxhub",
entry->src_id, entry->ring_id, entry->vmid,
- entry->pasid, task_info.process_name, task_info.tgid,
- task_info.task_name, task_info.pid);
+ entry->pasid);
+
+ task_info = amdgpu_vm_get_task_info(adev, entry->pasid);
+ if (task_info)
+ dev_err(adev->dev, " for process %s pid %d thread %s
pid %d\n",
+ task_info->process_name, task_info->tgid,
+ task_info->task_name, task_info->pid);
+ amdgpu_vm_put_task_info(adev, entry->pasid);
+
dev_err(adev->dev, " in page starting at address
0x%016llx from client %d\n",
addr, entry->client_id);
if (!amdgpu_sriov_vf(adev))
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 5af235202513..8759ef1c5ea5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -1446,14 +1446,19 @@ static int
gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
gmc_v8_0_set_fault_enable_default(adev, false);
if (printk_ratelimit()) {
- struct amdgpu_task_info task_info;
+ struct amdgpu_task_info *task_info;
- memset(&task_info, 0, sizeof(struct amdgpu_task_info));
- amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
+ dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n",
+ entry->src_id, entry->src_data[0]);
+
+ task_info = amdgpu_vm_get_task_info(adev, entry->pasid);
+ if (task_info) {
+ dev_err(adev->dev, "for process %s pid %d thread %s
pid %d\n\n",
+ task_info->process_name, task_info->tgid,
+ task_info->task_name, task_info->pid);
+ }
+ amdgpu_vm_put_task_info(adev, entry->pasid);
- dev_err(adev->dev, "GPU fault detected: %d 0x%08x for
process %s pid %d thread %s pid %d\n",
- entry->src_id, entry->src_data[0],
task_info.process_name,
- task_info.tgid, task_info.task_name, task_info.pid);
dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR
0x%08X\n",
addr);
dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS
0x%08X\n",
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index f9a5a2c0573e..75b849d69875 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -550,7 +550,7 @@ static int gmc_v9_0_process_interrupt(struct
amdgpu_device *adev,
bool retry_fault = !!(entry->src_data[1] & 0x80);
bool write_fault = !!(entry->src_data[1] & 0x20);
uint32_t status = 0, cid = 0, rw = 0;
- struct amdgpu_task_info task_info;
+ struct amdgpu_task_info *task_info;
struct amdgpu_vmhub *hub;
const char *mmhub_cid;
const char *hub_name;
@@ -625,16 +625,19 @@ static int gmc_v9_0_process_interrupt(struct
amdgpu_device *adev,
if (!printk_ratelimit())
return 0;
-
- memset(&task_info, 0, sizeof(struct amdgpu_task_info));
- amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
-
dev_err(adev->dev,
- "[%s] %s page fault (src_id:%u ring:%u vmid:%u pasid:%u,
for process %s pid %d thread %s pid %d)\n",
+ "[%s] %s page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n",
hub_name, retry_fault ? "retry" : "no-retry",
entry->src_id, entry->ring_id, entry->vmid,
- entry->pasid, task_info.process_name, task_info.tgid,
- task_info.task_name, task_info.pid);
+ entry->pasid);
+
+ task_info = amdgpu_vm_get_task_info(adev, entry->pasid);
+ if (task_info)
+ dev_err(adev->dev, " process %s pid %d thread %s pid %d\n",
+ task_info->process_name, task_info->tgid,
+ task_info->task_name, task_info->pid);
+ amdgpu_vm_put_task_info(adev, entry->pasid);
+
dev_err(adev->dev, " in page starting at address 0x%016llx
from IH client 0x%x (%s)\n",
addr, entry->client_id,
soc15_ih_clientid_name[entry->client_id]);
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index cd37f45e01a1..d3b9fe74332a 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -2112,7 +2112,7 @@ static int sdma_v4_0_print_iv_entry(struct
amdgpu_device *adev,
struct amdgpu_iv_entry *entry)
{
int instance;
- struct amdgpu_task_info task_info;
+ struct amdgpu_task_info *task_info;
u64 addr;
instance = sdma_v4_0_irq_id_to_seq(entry->client_id);
@@ -2124,15 +2124,16 @@ static int sdma_v4_0_print_iv_entry(struct
amdgpu_device *adev,
addr = (u64)entry->src_data[0] << 12;
addr |= ((u64)entry->src_data[1] & 0xf) << 44;
- memset(&task_info, 0, sizeof(struct amdgpu_task_info));
- amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
-
dev_dbg_ratelimited(adev->dev,
- "[sdma%d] address:0x%016llx src_id:%u ring:%u vmid:%u "
- "pasid:%u, for process %s pid %d thread %s pid %d\n",
- instance, addr, entry->src_id, entry->ring_id,
entry->vmid,
- entry->pasid, task_info.process_name, task_info.tgid,
- task_info.task_name, task_info.pid);
+ "[sdma%d] address:0x%016llx src_id:%u ring:%u vmid:%u
pasid:%u\n",
+ instance, addr, entry->src_id, entry->ring_id,
entry->vmid, entry->pasid);
+
+ task_info = amdgpu_vm_get_task_info(adev, entry->pasid);
+ if (task_info)
+ dev_dbg_ratelimited(adev->dev, "for process %s pid %d
thread %s pid %d\n",
+ task_info->process_name, task_info->tgid,
+ task_info->task_name, task_info->pid);
+ amdgpu_vm_put_task_info(adev, entry->pasid);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
index f413898dda37..56c2f744d64e 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
@@ -1633,7 +1633,7 @@ static int sdma_v4_4_2_print_iv_entry(struct
amdgpu_device *adev,
struct amdgpu_iv_entry *entry)
{
int instance;
- struct amdgpu_task_info task_info;
+ struct amdgpu_task_info *task_info;
u64 addr;
instance = sdma_v4_4_2_irq_id_to_seq(entry->client_id);
@@ -1645,15 +1645,17 @@ static int
sdma_v4_4_2_print_iv_entry(struct amdgpu_device *adev,
addr = (u64)entry->src_data[0] << 12;
addr |= ((u64)entry->src_data[1] & 0xf) << 44;
- memset(&task_info, 0, sizeof(struct amdgpu_task_info));
- amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
-
dev_dbg_ratelimited(adev->dev,
- "[sdma%d] address:0x%016llx src_id:%u ring:%u vmid:%u "
- "pasid:%u, for process %s pid %d thread %s pid %d\n",
- instance, addr, entry->src_id, entry->ring_id,
entry->vmid,
- entry->pasid, task_info.process_name, task_info.tgid,
- task_info.task_name, task_info.pid);
+ "[sdma%d] address:0x%016llx src_id:%u ring:%u vmid:%u
pasid:%u\n",
+ instance, addr, entry->src_id, entry->ring_id,
entry->vmid, entry->pasid);
+
+ task_info = amdgpu_vm_get_task_info(adev, entry->pasid);
+ if (task_info)
+ dev_dbg_ratelimited(adev->dev, "for process %s pid %d
thread %s pid %d\n",
+ task_info->process_name, task_info->tgid,
+ task_info->task_name, task_info->pid);
+ amdgpu_vm_put_task_info(adev, entry->pasid);
+
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
index d9953c2b2661..6b51262811f6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
@@ -238,16 +238,18 @@ void
kfd_smi_event_update_thermal_throttling(struct kfd_node *dev,
void kfd_smi_event_update_vmfault(struct kfd_node *dev,
uint16_t pasid)
{
- struct amdgpu_task_info task_info;
+ struct amdgpu_task_info *task_info;
- memset(&task_info, 0, sizeof(struct amdgpu_task_info));
- amdgpu_vm_get_task_info(dev->adev, pasid, &task_info);
/* Report VM faults from user applications, not retry from
kernel */
- if (!task_info.pid)
- return;
+ task_info = amdgpu_vm_get_task_info(dev->adev, pasid);
+ if (!task_info || !task_info->pid)
+ goto unref;
kfd_smi_event_add(0, dev, KFD_SMI_EVENT_VMFAULT, "%x:%s\n",
- task_info.pid, task_info.task_name);
+ task_info->pid, task_info->task_name);
+
+unref:
+ amdgpu_vm_put_task_info(dev->adev, pasid);
}
void kfd_smi_event_page_fault_start(struct kfd_node *node,
pid_t pid,