Sometimes when user is trying to get error state out from debugfs after gpu hang, the memory is low and/or fragmented enough that kmalloc in seq_file will fail. Prevent big kmalloc by avoiding seq_file and instead convert error state to string in smaller chunks. v2: better alloc flags, better truncate, correct locking, and error handling improvements (Chris Wilson) Signed-off-by: Mika Kuoppala <mika.kuoppala at intel.com> --- drivers/gpu/drm/i915/i915_debugfs.c | 242 +++++++++++++++++++++++++--------- drivers/gpu/drm/i915/i915_drv.h | 15 ++- drivers/gpu/drm/i915/intel_display.c | 54 ++++---- drivers/gpu/drm/i915/intel_overlay.c | 13 +- 4 files changed, 231 insertions(+), 93 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index a55630a..3a8409a 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -604,15 +604,80 @@ static const char *purgeable_flag(int purgeable) return purgeable ? " purgeable" : ""; } -static void print_error_buffers(struct seq_file *m, +static void i915_error_vprintf(struct drm_i915_error_state_buf *e, + const char *f, va_list args) +{ + unsigned len; + + if (!e->err && WARN(e->bytes > (e->size - 1), "overflow")) { + e->err = -ENOSPC; + return; + } + + if (e->bytes == e->size - 1 || e->err) + return; + + /* Seek the first printf which is hits start position */ + if (e->pos < e->start) { + len = vsnprintf(NULL, 0, f, args); + if (e->pos + len <= e->start) { + e->pos += len; + return; + } + + /* First vsnprintf needs to fit in full for memmove*/ + if (len >= e->size) { + e->err = -EIO; + return; + } + } + + len = vsnprintf(e->buf + e->bytes, e->size - e->bytes, f, args); + if (len >= e->size - e->bytes) + len = e->size - e->bytes - 1; + + /* If this is first printf in this window, adjust it so that + * start position matches start of the buffer + */ + if (e->pos < e->start) { + const size_t off = e->start - e->pos; + + /* Should not happen but be paranoid */ + if (off > len || e->bytes) { + e->err = -EIO; + return; + } + + memmove(e->buf, e->buf + off, len - off); + e->bytes = len - off; + e->pos = e->start; + return; + } + + e->bytes += len; + e->pos += len; +} + +void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...) +{ + va_list args; + + va_start(args, f); + i915_error_vprintf(e, f, args); + va_end(args); +} + +#define err_printf(e, ...) i915_error_printf(e, __VA_ARGS__) + +static void print_error_buffers(struct drm_i915_error_state_buf *m, const char *name, struct drm_i915_error_buffer *err, int count) { - seq_printf(m, "%s [%d]:\n", name, count); + err_printf(m, "%s [%d]:\n", name, count); while (count--) { - seq_printf(m, " %08x %8u %02x %02x %x %x%s%s%s%s%s%s%s", + err_printf(m, " %08x %8u %02x %02x %x %x%s%s%s%s%s%s%s", err->gtt_offset, err->size, err->read_domains, @@ -627,50 +692,50 @@ static void print_error_buffers(struct seq_file *m, cache_level_str(err->cache_level)); if (err->name) - seq_printf(m, " (name: %d)", err->name); + err_printf(m, " (name: %d)", err->name); if (err->fence_reg != I915_FENCE_REG_NONE) - seq_printf(m, " (fence: %d)", err->fence_reg); + err_printf(m, " (fence: %d)", err->fence_reg); - seq_printf(m, "\n"); + err_printf(m, "\n"); err++; } } -static void i915_ring_error_state(struct seq_file *m, +static void i915_ring_error_state(struct drm_i915_error_state_buf *m, struct drm_device *dev, struct drm_i915_error_state *error, unsigned ring) { BUG_ON(ring >= I915_NUM_RINGS); /* shut up confused gcc */ - seq_printf(m, "%s command stream:\n", ring_str(ring)); - seq_printf(m, " HEAD: 0x%08x\n", error->head[ring]); - seq_printf(m, " TAIL: 0x%08x\n", error->tail[ring]); - seq_printf(m, " CTL: 0x%08x\n", error->ctl[ring]); - seq_printf(m, " ACTHD: 0x%08x\n", error->acthd[ring]); - seq_printf(m, " IPEIR: 0x%08x\n", error->ipeir[ring]); - seq_printf(m, " IPEHR: 0x%08x\n", error->ipehr[ring]); - seq_printf(m, " INSTDONE: 0x%08x\n", error->instdone[ring]); + err_printf(m, "%s command stream:\n", ring_str(ring)); + err_printf(m, " HEAD: 0x%08x\n", error->head[ring]); + err_printf(m, " TAIL: 0x%08x\n", error->tail[ring]); + err_printf(m, " CTL: 0x%08x\n", error->ctl[ring]); + err_printf(m, " ACTHD: 0x%08x\n", error->acthd[ring]); + err_printf(m, " IPEIR: 0x%08x\n", error->ipeir[ring]); + err_printf(m, " IPEHR: 0x%08x\n", error->ipehr[ring]); + err_printf(m, " INSTDONE: 0x%08x\n", error->instdone[ring]); if (ring == RCS && INTEL_INFO(dev)->gen >= 4) - seq_printf(m, " BBADDR: 0x%08llx\n", error->bbaddr); + err_printf(m, " BBADDR: 0x%08llx\n", error->bbaddr); if (INTEL_INFO(dev)->gen >= 4) - seq_printf(m, " INSTPS: 0x%08x\n", error->instps[ring]); - seq_printf(m, " INSTPM: 0x%08x\n", error->instpm[ring]); - seq_printf(m, " FADDR: 0x%08x\n", error->faddr[ring]); + err_printf(m, " INSTPS: 0x%08x\n", error->instps[ring]); + err_printf(m, " INSTPM: 0x%08x\n", error->instpm[ring]); + err_printf(m, " FADDR: 0x%08x\n", error->faddr[ring]); if (INTEL_INFO(dev)->gen >= 6) { - seq_printf(m, " RC PSMI: 0x%08x\n", error->rc_psmi[ring]); - seq_printf(m, " FAULT_REG: 0x%08x\n", error->fault_reg[ring]); - seq_printf(m, " SYNC_0: 0x%08x [last synced 0x%08x]\n", + err_printf(m, " RC PSMI: 0x%08x\n", error->rc_psmi[ring]); + err_printf(m, " FAULT_REG: 0x%08x\n", error->fault_reg[ring]); + err_printf(m, " SYNC_0: 0x%08x [last synced 0x%08x]\n", error->semaphore_mboxes[ring][0], error->semaphore_seqno[ring][0]); - seq_printf(m, " SYNC_1: 0x%08x [last synced 0x%08x]\n", + err_printf(m, " SYNC_1: 0x%08x [last synced 0x%08x]\n", error->semaphore_mboxes[ring][1], error->semaphore_seqno[ring][1]); } - seq_printf(m, " seqno: 0x%08x\n", error->seqno[ring]); - seq_printf(m, " waiting: %s\n", yesno(error->waiting[ring])); - seq_printf(m, " ring->head: 0x%08x\n", error->cpu_ring_head[ring]); - seq_printf(m, " ring->tail: 0x%08x\n", error->cpu_ring_tail[ring]); + err_printf(m, " seqno: 0x%08x\n", error->seqno[ring]); + err_printf(m, " waiting: %s\n", yesno(error->waiting[ring])); + err_printf(m, " ring->head: 0x%08x\n", error->cpu_ring_head[ring]); + err_printf(m, " ring->tail: 0x%08x\n", error->cpu_ring_tail[ring]); } struct i915_error_state_file_priv { @@ -678,9 +743,11 @@ struct i915_error_state_file_priv { struct drm_i915_error_state *error; }; -static int i915_error_state(struct seq_file *m, void *unused) + +static int i915_error_state(struct i915_error_state_file_priv *error_priv, + struct drm_i915_error_state_buf *m) + { - struct i915_error_state_file_priv *error_priv = m->private; struct drm_device *dev = error_priv->dev; drm_i915_private_t *dev_priv = dev->dev_private; struct drm_i915_error_state *error = error_priv->error; @@ -688,34 +755,35 @@ static int i915_error_state(struct seq_file *m, void *unused) int i, j, page, offset, elt; if (!error) { - seq_printf(m, "no error state collected\n"); + err_printf(m, "no error state collected\n"); return 0; } - seq_printf(m, "Time: %ld s %ld us\n", error->time.tv_sec, + err_printf(m, "Time: %ld s %ld us\n", error->time.tv_sec, error->time.tv_usec); - seq_printf(m, "Kernel: " UTS_RELEASE "\n"); - seq_printf(m, "PCI ID: 0x%04x\n", dev->pci_device); - seq_printf(m, "EIR: 0x%08x\n", error->eir); - seq_printf(m, "IER: 0x%08x\n", error->ier); - seq_printf(m, "PGTBL_ER: 0x%08x\n", error->pgtbl_er); - seq_printf(m, "FORCEWAKE: 0x%08x\n", error->forcewake); - seq_printf(m, "DERRMR: 0x%08x\n", error->derrmr); - seq_printf(m, "CCID: 0x%08x\n", error->ccid); + err_printf(m, "Kernel: " UTS_RELEASE "\n"); + err_printf(m, "PCI ID: 0x%04x\n", dev->pci_device); + err_printf(m, "EIR: 0x%08x\n", error->eir); + err_printf(m, "IER: 0x%08x\n", error->ier); + err_printf(m, "PGTBL_ER: 0x%08x\n", error->pgtbl_er); + err_printf(m, "FORCEWAKE: 0x%08x\n", error->forcewake); + err_printf(m, "DERRMR: 0x%08x\n", error->derrmr); + err_printf(m, "CCID: 0x%08x\n", error->ccid); for (i = 0; i < dev_priv->num_fence_regs; i++) - seq_printf(m, " fence[%d] = %08llx\n", i, error->fence[i]); + err_printf(m, " fence[%d] = %08llx\n", i, error->fence[i]); for (i = 0; i < ARRAY_SIZE(error->extra_instdone); i++) - seq_printf(m, " INSTDONE_%d: 0x%08x\n", i, error->extra_instdone[i]); + err_printf(m, " INSTDONE_%d: 0x%08x\n", i, + error->extra_instdone[i]); if (INTEL_INFO(dev)->gen >= 6) { - seq_printf(m, "ERROR: 0x%08x\n", error->error); - seq_printf(m, "DONE_REG: 0x%08x\n", error->done_reg); + err_printf(m, "ERROR: 0x%08x\n", error->error); + err_printf(m, "DONE_REG: 0x%08x\n", error->done_reg); } if (INTEL_INFO(dev)->gen == 7) - seq_printf(m, "ERR_INT: 0x%08x\n", error->err_int); + err_printf(m, "ERR_INT: 0x%08x\n", error->err_int); for_each_ring(ring, dev_priv, i) i915_ring_error_state(m, dev, error, i); @@ -734,24 +802,25 @@ static int i915_error_state(struct seq_file *m, void *unused) struct drm_i915_error_object *obj; if ((obj = error->ring[i].batchbuffer)) { - seq_printf(m, "%s --- gtt_offset = 0x%08x\n", + err_printf(m, "%s --- gtt_offset = 0x%08x\n", dev_priv->ring[i].name, obj->gtt_offset); offset = 0; for (page = 0; page < obj->page_count; page++) { for (elt = 0; elt < PAGE_SIZE/4; elt++) { - seq_printf(m, "%08x : %08x\n", offset, obj->pages[page][elt]); + err_printf(m, "%08x : %08x\n", offset, + obj->pages[page][elt]); offset += 4; } } } if (error->ring[i].num_requests) { - seq_printf(m, "%s --- %d requests\n", + err_printf(m, "%s --- %d requests\n", dev_priv->ring[i].name, error->ring[i].num_requests); for (j = 0; j < error->ring[i].num_requests; j++) { - seq_printf(m, " seqno 0x%08x, emitted %ld, tail 0x%08x\n", + err_printf(m, " seqno 0x%08x, emitted %ld, tail 0x%08x\n", error->ring[i].requests[j].seqno, error->ring[i].requests[j].jiffies, error->ring[i].requests[j].tail); @@ -759,13 +828,13 @@ static int i915_error_state(struct seq_file *m, void *unused) } if ((obj = error->ring[i].ringbuffer)) { - seq_printf(m, "%s --- ringbuffer = 0x%08x\n", + err_printf(m, "%s --- ringbuffer = 0x%08x\n", dev_priv->ring[i].name, obj->gtt_offset); offset = 0; for (page = 0; page < obj->page_count; page++) { for (elt = 0; elt < PAGE_SIZE/4; elt++) { - seq_printf(m, "%08x : %08x\n", + err_printf(m, "%08x : %08x\n", offset, obj->pages[page][elt]); offset += 4; @@ -775,12 +844,12 @@ static int i915_error_state(struct seq_file *m, void *unused) obj = error->ring[i].ctx; if (obj) { - seq_printf(m, "%s --- HW Context = 0x%08x\n", + err_printf(m, "%s --- HW Context = 0x%08x\n", dev_priv->ring[i].name, obj->gtt_offset); offset = 0; for (elt = 0; elt < PAGE_SIZE/16; elt += 4) { - seq_printf(m, "[%04x] %08x %08x %08x %08x\n", + err_printf(m, "[%04x] %08x %08x %08x %08x\n", offset, obj->pages[0][elt], obj->pages[0][elt+1], @@ -806,8 +875,7 @@ i915_error_state_write(struct file *filp, size_t cnt, loff_t *ppos) { - struct seq_file *m = filp->private_data; - struct i915_error_state_file_priv *error_priv = m->private; + struct i915_error_state_file_priv *error_priv = filp->private_data; struct drm_device *dev = error_priv->dev; int ret; @@ -842,25 +910,81 @@ static int i915_error_state_open(struct inode *inode, struct file *file) kref_get(&error_priv->error->ref); spin_unlock_irqrestore(&dev_priv->gpu_error.lock, flags); - return single_open(file, i915_error_state, error_priv); + file->private_data = error_priv; + + return 0; } static int i915_error_state_release(struct inode *inode, struct file *file) { - struct seq_file *m = file->private_data; - struct i915_error_state_file_priv *error_priv = m->private; + struct i915_error_state_file_priv *error_priv = file->private_data; if (error_priv->error) kref_put(&error_priv->error->ref, i915_error_state_free); kfree(error_priv); - return single_release(inode, file); + return 0; +} + +static ssize_t i915_error_state_read(struct file *file, char __user *userbuf, + size_t count, loff_t *pos) +{ + struct i915_error_state_file_priv *error_priv = file->private_data; + struct drm_i915_error_state_buf error_str; + loff_t tmp_pos = 0; + ssize_t ret_count = 0; + int ret = 0; + + memset(&error_str, 0, sizeof(error_str)); + + /* We need to have enough room to store any i915_error_state printf + * so that we can move it to start position. + */ + error_str.size = count + 1 > PAGE_SIZE ? count + 1 : PAGE_SIZE; + error_str.buf = kmalloc(error_str.size, + GFP_TEMPORARY | __GFP_NORETRY | __GFP_NOWARN); + + if (error_str.buf == NULL) { + error_str.size = PAGE_SIZE; + error_str.buf = kmalloc(error_str.size, GFP_TEMPORARY); + } + + if (error_str.buf == NULL) { + error_str.size = 128; + error_str.buf = kmalloc(error_str.size, GFP_TEMPORARY); + } + + if (error_str.buf == NULL) + return -ENOMEM; + + error_str.start = *pos; + + ret = i915_error_state(error_priv, &error_str); + if (ret) + goto out; + + if (error_str.bytes == 0 && error_str.err) { + ret = error_str.err; + goto out; + } + + ret_count = simple_read_from_buffer(userbuf, count, &tmp_pos, + error_str.buf, + error_str.bytes); + + if (ret_count < 0) + ret = ret_count; + else + *pos = error_str.start + ret_count; +out: + kfree(error_str.buf); + return ret ?: ret_count; } static const struct file_operations i915_error_state_fops = { .owner = THIS_MODULE, .open = i915_error_state_open, - .read = seq_read, + .read = i915_error_state_read, .write = i915_error_state_write, .llseek = default_llseek, .release = i915_error_state_release, diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 1edfef2..e8e36ae 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -840,6 +840,15 @@ struct i915_gem_mm { u32 object_count; }; +struct drm_i915_error_state_buf { + unsigned bytes; + unsigned size; + int err; + u8 *buf; + loff_t start; + loff_t pos; +}; + struct i915_gpu_error { /* For hangcheck timer */ #define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */ @@ -1849,6 +1858,7 @@ void i915_gem_dump_object(struct drm_i915_gem_object *obj, int len, /* i915_debugfs.c */ int i915_debugfs_init(struct drm_minor *minor); void i915_debugfs_cleanup(struct drm_minor *minor); +void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...); /* i915_suspend.c */ extern int i915_save_state(struct drm_device *dev); @@ -1932,10 +1942,11 @@ int i915_get_reset_stats_ioctl(struct drm_device *dev, void *data, /* overlay */ #ifdef CONFIG_DEBUG_FS extern struct intel_overlay_error_state *intel_overlay_capture_error_state(struct drm_device *dev); -extern void intel_overlay_print_error_state(struct seq_file *m, struct intel_overlay_error_state *error); +extern void intel_overlay_print_error_state(struct drm_i915_error_state_buf *e, + struct intel_overlay_error_state *error); extern struct intel_display_error_state *intel_display_capture_error_state(struct drm_device *dev); -extern void intel_display_print_error_state(struct seq_file *m, +extern void intel_display_print_error_state(struct drm_i915_error_state_buf *e, struct drm_device *dev, struct intel_display_error_state *error); #endif diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 684ab64..1c0d6d3 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -9858,48 +9858,50 @@ intel_display_capture_error_state(struct drm_device *dev) return error; } +#define err_printf(e, ...) i915_error_printf(e, __VA_ARGS__) + void -intel_display_print_error_state(struct seq_file *m, +intel_display_print_error_state(struct drm_i915_error_state_buf *m, struct drm_device *dev, struct intel_display_error_state *error) { int i; - seq_printf(m, "Num Pipes: %d\n", INTEL_INFO(dev)->num_pipes); + err_printf(m, "Num Pipes: %d\n", INTEL_INFO(dev)->num_pipes); if (HAS_POWER_WELL(dev)) - seq_printf(m, "PWR_WELL_CTL2: %08x\n", + err_printf(m, "PWR_WELL_CTL2: %08x\n", error->power_well_driver); for_each_pipe(i) { - seq_printf(m, "Pipe [%d]:\n", i); - seq_printf(m, " CPU transcoder: %c\n", + err_printf(m, "Pipe [%d]:\n", i); + err_printf(m, " CPU transcoder: %c\n", transcoder_name(error->pipe[i].cpu_transcoder)); - seq_printf(m, " CONF: %08x\n", error->pipe[i].conf); - seq_printf(m, " SRC: %08x\n", error->pipe[i].source); - seq_printf(m, " HTOTAL: %08x\n", error->pipe[i].htotal); - seq_printf(m, " HBLANK: %08x\n", error->pipe[i].hblank); - seq_printf(m, " HSYNC: %08x\n", error->pipe[i].hsync); - seq_printf(m, " VTOTAL: %08x\n", error->pipe[i].vtotal); - seq_printf(m, " VBLANK: %08x\n", error->pipe[i].vblank); - seq_printf(m, " VSYNC: %08x\n", error->pipe[i].vsync); - - seq_printf(m, "Plane [%d]:\n", i); - seq_printf(m, " CNTR: %08x\n", error->plane[i].control); - seq_printf(m, " STRIDE: %08x\n", error->plane[i].stride); + err_printf(m, " CONF: %08x\n", error->pipe[i].conf); + err_printf(m, " SRC: %08x\n", error->pipe[i].source); + err_printf(m, " HTOTAL: %08x\n", error->pipe[i].htotal); + err_printf(m, " HBLANK: %08x\n", error->pipe[i].hblank); + err_printf(m, " HSYNC: %08x\n", error->pipe[i].hsync); + err_printf(m, " VTOTAL: %08x\n", error->pipe[i].vtotal); + err_printf(m, " VBLANK: %08x\n", error->pipe[i].vblank); + err_printf(m, " VSYNC: %08x\n", error->pipe[i].vsync); + + err_printf(m, "Plane [%d]:\n", i); + err_printf(m, " CNTR: %08x\n", error->plane[i].control); + err_printf(m, " STRIDE: %08x\n", error->plane[i].stride); if (INTEL_INFO(dev)->gen <= 3) { - seq_printf(m, " SIZE: %08x\n", error->plane[i].size); - seq_printf(m, " POS: %08x\n", error->plane[i].pos); + err_printf(m, " SIZE: %08x\n", error->plane[i].size); + err_printf(m, " POS: %08x\n", error->plane[i].pos); } if (INTEL_INFO(dev)->gen <= 7 && !IS_HASWELL(dev)) - seq_printf(m, " ADDR: %08x\n", error->plane[i].addr); + err_printf(m, " ADDR: %08x\n", error->plane[i].addr); if (INTEL_INFO(dev)->gen >= 4) { - seq_printf(m, " SURF: %08x\n", error->plane[i].surface); - seq_printf(m, " TILEOFF: %08x\n", error->plane[i].tile_offset); + err_printf(m, " SURF: %08x\n", error->plane[i].surface); + err_printf(m, " TILEOFF: %08x\n", error->plane[i].tile_offset); } - seq_printf(m, "Cursor [%d]:\n", i); - seq_printf(m, " CNTR: %08x\n", error->cursor[i].control); - seq_printf(m, " POS: %08x\n", error->cursor[i].position); - seq_printf(m, " BASE: %08x\n", error->cursor[i].base); + err_printf(m, "Cursor [%d]:\n", i); + err_printf(m, " CNTR: %08x\n", error->cursor[i].control); + err_printf(m, " POS: %08x\n", error->cursor[i].position); + err_printf(m, " BASE: %08x\n", error->cursor[i].base); } } #endif diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index 40e509c..a369881 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -1485,14 +1485,15 @@ err: } void -intel_overlay_print_error_state(struct seq_file *m, struct intel_overlay_error_state *error) +intel_overlay_print_error_state(struct drm_i915_error_state_buf *m, + struct intel_overlay_error_state *error) { - seq_printf(m, "Overlay, status: 0x%08x, interrupt: 0x%08x\n", - error->dovsta, error->isr); - seq_printf(m, " Register file at 0x%08lx:\n", - error->base); + i915_error_printf(m, "Overlay, status: 0x%08x, interrupt: 0x%08x\n", + error->dovsta, error->isr); + i915_error_printf(m, " Register file at 0x%08lx:\n", + error->base); -#define P(x) seq_printf(m, " " #x ": 0x%08x\n", error->regs.x) +#define P(x) i915_error_printf(m, " " #x ": 0x%08x\n", error->regs.x) P(OBUF_0Y); P(OBUF_1Y); P(OBUF_0U); -- 1.7.9.5