Capture the GPU state on a GPU hang and store it for later playback using the 'crash' node in the debugfs directory. Only one crash state is stored at a time on the assumption that the first hang is usually the most interesting. The existing crash state can be cleared by writing to the debugfs node and then a new one will be captured on the next hang. Signed-off-by: Jordan Crouse <jcrouse@xxxxxxxxxxxxxx> --- drivers/gpu/drm/msm/adreno/a3xx_gpu.c | 1 + drivers/gpu/drm/msm/adreno/a4xx_gpu.c | 1 + drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 1 + drivers/gpu/drm/msm/adreno/adreno_gpu.c | 16 +++++++-- drivers/gpu/drm/msm/adreno/adreno_gpu.h | 2 +- drivers/gpu/drm/msm/msm_debugfs.c | 57 +++++++++++++++++++++++++++++++++ drivers/gpu/drm/msm/msm_gpu.h | 44 ++++++++++++++++++++++++- 7 files changed, 117 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c index 8a7d56ec..be65b4e 100644 --- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c @@ -320,6 +320,7 @@ static void a3xx_recover(struct msm_gpu *gpu) gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 0); adreno_recover(gpu); + msm_gpu_crashstate_set(gpu, state); gpu->funcs->gpu_state_put(state); } diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c index e64c7fc..943e13f 100644 --- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c @@ -319,6 +319,7 @@ static void a4xx_recover(struct msm_gpu *gpu) gpu_write(gpu, REG_A4XX_RBBM_SW_RESET_CMD, 0); adreno_recover(gpu); + msm_gpu_crashstate_set(gpu, state); gpu->funcs->gpu_state_put(state); } diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index 6747b7b..1e32c2e 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -774,6 +774,7 @@ static void a5xx_recover(struct msm_gpu *gpu) gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 0); adreno_recover(gpu); + msm_gpu_crashstate_set(gpu, state); gpu->funcs->gpu_state_put(state); } diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index ba1b912..e1785c2 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -372,6 +372,8 @@ struct msm_gpu_state *adreno_gpu_state_get(struct msm_gpu *gpu) if (!state) return ERR_PTR(-ENOMEM); + kref_init(&state->ref); + do_gettimeofday(&state->time); for (i = 0; i < gpu->nr_rings; i++) { @@ -407,15 +409,23 @@ struct msm_gpu_state *adreno_gpu_state_get(struct msm_gpu *gpu) return state; } -void adreno_gpu_state_put(struct msm_gpu_state *state) +static void adreno_gpu_state_destroy(struct kref *kref) { - if (IS_ERR_OR_NULL(state)) - return; + struct msm_gpu_state *state = container_of(kref, + struct msm_gpu_state, ref); kfree(state->registers); kfree(state); } +int adreno_gpu_state_put(struct msm_gpu_state *state) +{ + if (IS_ERR_OR_NULL(state)) + return 1; + + return kref_put(&state->ref, adreno_gpu_state_destroy); +} + void adreno_show_info(struct msm_gpu *gpu, struct msm_gpu_state *state, struct drm_printer *p) { diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h index 4542b6b..e304a3e 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h @@ -230,7 +230,7 @@ void adreno_show_regs(struct msm_gpu *gpu, struct msm_gpu_state *state, struct drm_printer *p); struct msm_gpu_state *adreno_gpu_state_get(struct msm_gpu *gpu); -void adreno_gpu_state_put(struct msm_gpu_state *state); +int adreno_gpu_state_put(struct msm_gpu_state *state); /* ringbuffer helpers (the parts that are adreno specific) */ diff --git a/drivers/gpu/drm/msm/msm_debugfs.c b/drivers/gpu/drm/msm/msm_debugfs.c index 89ee74b..1bde88d 100644 --- a/drivers/gpu/drm/msm/msm_debugfs.c +++ b/drivers/gpu/drm/msm/msm_debugfs.c @@ -16,11 +16,65 @@ */ #ifdef CONFIG_DEBUG_FS + +#include <generated/utsrelease.h> +#include <linux/debugfs.h> #include "msm_drv.h" #include "msm_gpu.h" #include "msm_kms.h" #include "msm_debugfs.h" +static int msm_gpu_crash_show(struct seq_file *m, void *data) +{ + struct msm_gpu *gpu = m->private; + struct msm_gpu_state *state; + + state = msm_gpu_crashstate_get(gpu); + if (!state) + return 0; + + seq_printf(m, "%s Crash Status:\n", gpu->name); + seq_puts(m, "Kernel: " UTS_RELEASE "\n"); + seq_printf(m, "Time: %ld s %ld us\n", + state->time.tv_sec, state->time.tv_usec); + + gpu->funcs->show(gpu, state, m); + + msm_gpu_crashstate_put(gpu); + + return 0; +} + +static ssize_t msm_gpu_crash_write(struct file *file, const char __user *buf, + size_t count, loff_t *pos) +{ + struct msm_gpu *gpu = ((struct seq_file *)file->private_data)->private; + + dev_err(gpu->dev->dev, "Releasing the GPU crash state\n"); + msm_gpu_crashstate_put(gpu); + + return count; +} + +static int msm_gpu_crash_open(struct inode *inode, struct file *file) +{ + struct msm_drm_private *priv = inode->i_private; + + if (!priv->gpu) + return -ENODEV; + + return single_open(file, msm_gpu_crash_show, priv->gpu); +} + +static const struct file_operations msm_gpu_crash_fops = { + .owner = THIS_MODULE, + .open = msm_gpu_crash_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .write = msm_gpu_crash_write, +}; + static int msm_gpu_show(struct drm_device *dev, struct seq_file *m) { struct msm_drm_private *priv = dev->dev_private; @@ -170,6 +224,9 @@ int msm_debugfs_init(struct drm_minor *minor) return ret; } + debugfs_create_file("crash", 0644, minor->debugfs_root, + priv, &msm_gpu_crash_fops); + if (priv->kms->funcs->debugfs_init) ret = priv->kms->funcs->debugfs_init(priv->kms, minor); diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index cff52ca..7ce2cba 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -69,7 +69,7 @@ struct msm_gpu_funcs { struct seq_file *m); #endif struct msm_gpu_state *(*gpu_state_get)(struct msm_gpu *gpu); - void (*gpu_state_put)(struct msm_gpu_state *state); + int (*gpu_state_put)(struct msm_gpu_state *state); }; struct msm_gpu { @@ -129,6 +129,8 @@ struct msm_gpu { struct work_struct recover_work; struct drm_gem_object *memptrs_bo; + + struct msm_gpu_state *crashstate; }; /* It turns out that all targets use the same ringbuffer size */ @@ -176,6 +178,7 @@ struct msm_gpu_submitqueue { }; struct msm_gpu_state { + struct kref ref; struct timeval time; struct { @@ -270,4 +273,43 @@ static inline void msm_submitqueue_put(struct msm_gpu_submitqueue *queue) kref_put(&queue->ref, msm_submitqueue_destroy); } +static inline void msm_gpu_crashstate_set(struct msm_gpu *gpu, + struct msm_gpu_state *state) +{ + /* FIXME: make sure the mutex is set? */ + + if (!IS_ERR_OR_NULL(state) && !gpu->crashstate) { + kref_get(&state->ref); + gpu->crashstate = state; + } +} + +static inline struct msm_gpu_state *msm_gpu_crashstate_get(struct msm_gpu *gpu) +{ + struct msm_gpu_state *state = NULL; + + mutex_lock(&gpu->dev->struct_mutex); + + if (gpu->crashstate) { + kref_get(&gpu->crashstate->ref); + state = gpu->crashstate; + } + + mutex_unlock(&gpu->dev->struct_mutex); + + return state; +} + +static inline void msm_gpu_crashstate_put(struct msm_gpu *gpu) +{ + mutex_lock(&gpu->dev->struct_mutex); + + if (gpu->crashstate) { + if (gpu->funcs->gpu_state_put(gpu->crashstate)) + gpu->crashstate = NULL; + } + + mutex_unlock(&gpu->dev->struct_mutex); +} + #endif /* __MSM_GPU_H__ */ -- 1.9.1 -- To unsubscribe from this list: send the line "unsubscribe linux-arm-msm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html