Am 16.09.24 um 15:32 schrieb Pierre-Eric Pelloux-Prayer:
In debugfs gem_info/vm_info files, timeout handler and page fault reports.
This information is useful with the virtio/native-context driver: this
allows the guest applications identifier to visible in amdgpu's output.
The output in amdgpu_vm_info/amdgpu_gem_info looks like this:
pid:12255 Process:glxgears/test-set-fd-name ----------
Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@xxxxxxx>
---
.../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 2 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 16 +++++++++---
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 25 +++++++++++++++++--
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 4 +--
5 files changed, 40 insertions(+), 9 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 6d5fd371d5ce..1712feb2c238 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1577,7 +1577,7 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
if (ret)
return ret;
- amdgpu_vm_set_task_info(avm);
+ amdgpu_vm_set_task_info(avm, NULL);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 1e475eb01417..d32dc547cc80 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -310,7 +310,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
kvfree(chunk_array);
/* Use this opportunity to fill in task info for the vm */
- amdgpu_vm_set_task_info(vm);
+ amdgpu_vm_set_task_info(vm, p->filp);
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 0e617dff8765..0c52168edbaf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -997,6 +997,10 @@ static int amdgpu_debugfs_gem_info_show(struct seq_file *m, void *unused)
if (r)
return r;
+ r = mutex_lock_interruptible(&file->name_lock);
+ if (r)
+ goto out;
+
list_for_each_entry(file, &dev->filelist, lhead) {
struct task_struct *task;
struct drm_gem_object *gobj;
@@ -1012,8 +1016,13 @@ static int amdgpu_debugfs_gem_info_show(struct seq_file *m, void *unused)
rcu_read_lock();
pid = rcu_dereference(file->pid);
task = pid_task(pid, PIDTYPE_TGID);
- seq_printf(m, "pid %8d command %s:\n", pid_nr(pid),
- task ? task->comm : "<unknown>");
+ seq_printf(m, "pid %8d command %s", pid_nr(pid),
+ task ? task->comm : "<unknown>");
+ if (file->name) {
+ seq_putc(m, '/');
+ seq_puts(m, file->name);
+ }
+ seq_puts(m, ":\n");
rcu_read_unlock();
spin_lock(&file->table_lock);
@@ -1024,7 +1033,8 @@ static int amdgpu_debugfs_gem_info_show(struct seq_file *m, void *unused)
}
spin_unlock(&file->table_lock);
}
-
+ mutex_unlock(&file->name_lock);
+out:
mutex_unlock(&dev->filelist_mutex);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index e20d19ae01b2..5701d74159d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2370,7 +2370,7 @@ static int amdgpu_vm_create_task_info(struct amdgpu_vm *vm)
*
* @vm: vm for which to set the info
*/
-void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
+void amdgpu_vm_set_task_info(struct amdgpu_vm *vm, struct drm_file *file)
{
if (!vm->task_info)
return;
@@ -2385,7 +2385,28 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
return;
vm->task_info->tgid = current->group_leader->pid;
- get_task_comm(vm->task_info->process_name, current->group_leader);
+ __get_task_comm(vm->task_info->process_name, TASK_COMM_LEN,
+ current->group_leader);
+ /* Append drm_client_name if set. */
+ if (file && file->name) {
+ mutex_lock(&file->name_lock);
+
+ /* Assert that process_name is big enough to store process_name,
+ * so: (TASK_COMM_LEN - 1) + '/' + '\0'.
+ * This way we can concat file->name without worrying about space.
+ */
+ static_assert(sizeof(vm->task_info->process_name) >= TASK_COMM_LEN + 1);
That won't work correctly.
+ if (file->name) {
+ int n;
+
+ n = strlen(vm->task_info->process_name);
+ vm->task_info->process_name[n] = '/';
+ strscpy_pad(&vm->task_info->process_name[n + 1],
+ file->name,
+ sizeof(vm->task_info->process_name) - (n + 1));
+ }
+ mutex_unlock(&file->name_lock);
+ }
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index d12d66dca8e9..cabec384b4d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -232,7 +232,7 @@ struct amdgpu_vm_pte_funcs {
};
struct amdgpu_task_info {
- char process_name[TASK_COMM_LEN];
+ char process_name[NAME_MAX];
You would need here TASK_COMM_LEN + NAME_MAX + 1.
But I'm really wondering if we shouldn't rework amdgpu_task_info to not
be based on fixed strings, but rather use a single dynamically allocated
string.
Potentially even at the end of the structure after pid, tgid etc...
Regards,
Christian.
char task_name[TASK_COMM_LEN];
pid_t pid;
pid_t tgid;
@@ -561,7 +561,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
u32 vmid, u32 node_id, uint64_t addr, uint64_t ts,
bool write_fault);
-void amdgpu_vm_set_task_info(struct amdgpu_vm *vm);
+void amdgpu_vm_set_task_info(struct amdgpu_vm *vm, struct drm_file *file);
void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
struct amdgpu_vm *vm);