Report the per-ring timeout in milliseconds and allow users to adjust the timeout dynamically. This can be useful for debugging, e.g. to more easily test whether a submission genuinely hangs or is just taking very long, and to temporarily disable GPU recovery so that shader problems can be examined in detail, including single-stepping through shader code. It feels a bit questionable to access ring->sched.timeout without any locking -- under a C++ memory model it would technically be undefined behavior. But it's not like a lot can go wrong here in practice, and it's not clear to me what locking or atomics, if any, should be used. Signed-off-by: Nicolai Hähnle <nicolai.haehnle@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 32 +++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index dc474b809604..32d223daa789 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -471,35 +471,65 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf, return result; } static const struct file_operations amdgpu_debugfs_ring_fops = { .owner = THIS_MODULE, .read = amdgpu_debugfs_ring_read, .llseek = default_llseek }; +static int amdgpu_debugfs_timeout_ring_get(void *data, u64 *val) { + struct amdgpu_ring *ring = data; + + if (ring->sched.timeout == MAX_SCHEDULE_TIMEOUT) + *val = 0; + else + *val = jiffies_to_msecs(ring->sched.timeout); + + return 0; +} + +static int amdgpu_debugfs_timeout_ring_set(void *data, u64 val) { + struct amdgpu_ring *ring = data; + + if (val == 0) + ring->sched.timeout = MAX_SCHEDULE_TIMEOUT; + else + ring->sched.timeout = msecs_to_jiffies(val); + + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_timeout_ring_fops, + amdgpu_debugfs_timeout_ring_get, + amdgpu_debugfs_timeout_ring_set, + "%llu\n"); + #endif void amdgpu_debugfs_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring) { #if defined(CONFIG_DEBUG_FS) struct drm_minor *minor = adev_to_drm(adev)->primary; struct dentry *root = minor->debugfs_root; - char name[32]; + char name[40]; sprintf(name, "amdgpu_ring_%s", ring->name); debugfs_create_file_size(name, S_IFREG | S_IRUGO, root, ring, &amdgpu_debugfs_ring_fops, ring->ring_size + 12); + sprintf(name, "amdgpu_timeout_ring_%s", ring->name); + debugfs_create_file(name, S_IFREG | S_IRUGO | S_IWUSR, root, ring, + &amdgpu_debugfs_timeout_ring_fops); #endif } /** * amdgpu_ring_test_helper - tests ring and set sched readiness status * * @ring: ring to try the recovery on * * Tests ring and set sched readiness status * -- 2.40.0