On Fri, Aug 05, 2016 at 10:06:04AM +0100, Chris Wilson wrote: > Our error states are quickly growing, pinning kernel memory with them. > The majority of the space is taken up by the error objects. These > compress well using zlib and without decode are mostly meaningless, so > encoding them does not hinder quickly parsing the error state for > familiarity. > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Seems to also contain a wholesale rework of the capture logic using a ggtt mappable entry. Would explain why the missing clflush isn't an issue for you. Imo best if that part is reordered as the first patch (or at least before stop_machine, which requires the removal of cflush), and then the zlib on top. On the idea itself, since I have no clue: How do we uncompress these again? Patched intel_error_decode, or can zlib deal with in-line streams? -Daniel > --- > drivers/gpu/drm/i915/Kconfig | 1 + > drivers/gpu/drm/i915/i915_drv.h | 4 +- > drivers/gpu/drm/i915/i915_gem_gtt.c | 10 ++ > drivers/gpu/drm/i915/i915_gem_gtt.h | 2 + > drivers/gpu/drm/i915/i915_gpu_error.c | 271 +++++++++++++++++----------------- > 5 files changed, 148 insertions(+), 140 deletions(-) > > diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig > index 7badcee88ebf..c8ea20526aef 100644 > --- a/drivers/gpu/drm/i915/Kconfig > +++ b/drivers/gpu/drm/i915/Kconfig > @@ -5,6 +5,7 @@ config DRM_I915 > select INTEL_GTT > select INTERVAL_TREE > select STOP_MACHINE > + select ZLIB_DEFLATE > # we need shmfs for the swappable backing store, and in particular > # the shmem_readpage() which depends upon tmpfs > select SHMEM > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 370a4c9eea70..edc1e6d6be0d 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -534,6 +534,7 @@ struct drm_i915_error_state { > u32 tail; > u32 head; > u32 ctl; > + u32 mode; > u32 hws; > u32 ipeir; > u32 ipehr; > @@ -550,9 +551,10 @@ struct drm_i915_error_state { > u32 semaphore_mboxes[I915_NUM_ENGINES - 1]; > > struct drm_i915_error_object { > - int page_count; > u64 gtt_offset; > u64 gtt_size; > + int page_count; > + int unused; > u32 *pages[0]; > } *ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page; > > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c > index 7c9f654d515a..c45e7f456cea 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c > @@ -2737,6 +2737,15 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv) > if (ret) > return ret; > > + /* Reserve a mappable slot for our lockless error capture */ > + ret = drm_mm_insert_node_in_range_generic(&ggtt->base.mm, > + &ggtt->gpu_error, > + 4096, 0, -1, > + 0, ggtt->mappable_end, > + 0, 0); > + if (ret) > + return ret; > + > /* Clear any non-preallocated blocks */ > drm_mm_for_each_hole(entry, &ggtt->base.mm, hole_start, hole_end) { > DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n", > @@ -2804,6 +2813,7 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv) > if (drm_mm_initialized(&ggtt->base.mm)) { > intel_vgt_deballoon(dev_priv); > > + drm_mm_remove_node(&ggtt->gpu_error); > drm_mm_takedown(&ggtt->base.mm); > list_del(&ggtt->base.global_link); > } > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h > index c54ae2323df3..010e10c0b62b 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.h > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h > @@ -428,6 +428,8 @@ struct i915_ggtt { > bool do_idle_maps; > > int mtrr; > + > + struct drm_mm_node gpu_error; > }; > > struct i915_hw_ppgtt { > diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c > index 6078f47d4bc0..f036584e55e3 100644 > --- a/drivers/gpu/drm/i915/i915_gpu_error.c > +++ b/drivers/gpu/drm/i915/i915_gpu_error.c > @@ -29,6 +29,7 @@ > > #include <generated/utsrelease.h> > #include <linux/stop_machine.h> > +#include <linux/zlib.h> > #include "i915_drv.h" > > static const char *engine_str(int engine) > @@ -237,6 +238,8 @@ static void error_print_engine(struct drm_i915_error_state_buf *m, > err_printf(m, " HEAD: 0x%08x\n", ee->head); > err_printf(m, " TAIL: 0x%08x\n", ee->tail); > err_printf(m, " CTL: 0x%08x\n", ee->ctl); > + err_printf(m, " MODE: 0x%08x [idle? %d]\n", > + ee->mode, !!(ee->mode & MODE_IDLE)); > err_printf(m, " HWS: 0x%08x\n", ee->hws); > err_printf(m, " ACTHD: 0x%08x %08x\n", > (u32)(ee->acthd>>32), (u32)ee->acthd); > @@ -307,18 +310,46 @@ void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...) > va_end(args); > } > > +static bool > +ascii85_encode(u32 in, char *out) > +{ > + int i; > + > + if (in == 0) > + return false; > + > + out[5] = '\0'; > + for (i = 5; i--; ) { > + out[i] = '!' + in % 85; > + in /= 85; > + } > + > + return true; > +} > + > static void print_error_obj(struct drm_i915_error_state_buf *m, > struct drm_i915_error_object *obj) > { > - int page, offset, elt; > + char out[6]; > + int page; > + > + err_puts(m, ":"); /* indicate compressed data */ > + for (page = 0; page < obj->page_count; page++) { > + int i, len; > + > + len = PAGE_SIZE; > + if (page == obj->page_count - 1) > + len -= obj->unused; > + len = (len + 3) / 4; > > - for (page = offset = 0; page < obj->page_count; page++) { > - for (elt = 0; elt < PAGE_SIZE/4; elt++) { > - err_printf(m, "%08x : %08x\n", offset, > - obj->pages[page][elt]); > - offset += 4; > + for (i = 0; i < len; i++) { > + if (ascii85_encode(obj->pages[page][i], out)) > + err_puts(m, out); > + else > + err_puts(m, "z"); > } > } > + err_puts(m, "\n"); > } > > int i915_error_state_to_str(struct drm_i915_error_state_buf *m, > @@ -328,8 +359,8 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, > struct drm_i915_private *dev_priv = to_i915(dev); > struct drm_i915_error_state *error = error_priv->error; > struct drm_i915_error_object *obj; > - int i, j, offset, elt; > int max_hangcheck_score; > + int i, j; > > if (!error) { > err_printf(m, "no error state collected\n"); > @@ -481,75 +512,33 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, > } > > if ((obj = ee->ringbuffer)) { > - err_printf(m, "%s --- ringbuffer = 0x%08x\n", > - dev_priv->engine[i].name, > - lower_32_bits(obj->gtt_offset)); > + err_printf(m, "%s --- ringbuffer = 0x%08llx\n", > + dev_priv->engine[i].name, obj->gtt_offset); > print_error_obj(m, obj); > } > > - if ((obj = ee->hws_page)) { > - u64 hws_offset = obj->gtt_offset; > - u32 *hws_page = &obj->pages[0][0]; > - > - if (i915.enable_execlists) { > - hws_offset += LRC_PPHWSP_PN * PAGE_SIZE; > - hws_page = &obj->pages[LRC_PPHWSP_PN][0]; > - } > - err_printf(m, "%s --- HW Status = 0x%08llx\n", > - dev_priv->engine[i].name, hws_offset); > - offset = 0; > - for (elt = 0; elt < PAGE_SIZE/16; elt += 4) { > - err_printf(m, "[%04x] %08x %08x %08x %08x\n", > - offset, > - hws_page[elt], > - hws_page[elt+1], > - hws_page[elt+2], > - hws_page[elt+3]); > - offset += 16; > - } > + if ((obj = ee->wa_ctx)) { > + err_printf(m, "%s --- WA Context = 0x%08llx\n", > + dev_priv->engine[i].name, obj->gtt_offset); > + print_error_obj(m, obj); > } > > - obj = ee->wa_ctx; > - if (obj) { > - u64 wa_ctx_offset = obj->gtt_offset; > - u32 *wa_ctx_page = &obj->pages[0][0]; > - struct intel_engine_cs *engine = &dev_priv->engine[RCS]; > - u32 wa_ctx_size = (engine->wa_ctx.indirect_ctx.size + > - engine->wa_ctx.per_ctx.size); > - > - err_printf(m, "%s --- WA ctx batch buffer = 0x%08llx\n", > - dev_priv->engine[i].name, wa_ctx_offset); > - offset = 0; > - for (elt = 0; elt < wa_ctx_size; elt += 4) { > - err_printf(m, "[%04x] %08x %08x %08x %08x\n", > - offset, > - wa_ctx_page[elt + 0], > - wa_ctx_page[elt + 1], > - wa_ctx_page[elt + 2], > - wa_ctx_page[elt + 3]); > - offset += 16; > - } > + if ((obj = ee->hws_page)) { > + err_printf(m, "%s --- HW Status = 0x%08llx\n", > + dev_priv->engine[i].name, obj->gtt_offset); > + print_error_obj(m, obj); > } > > if ((obj = ee->ctx)) { > - err_printf(m, "%s --- HW Context = 0x%08x\n", > - dev_priv->engine[i].name, > - lower_32_bits(obj->gtt_offset)); > + err_printf(m, "%s --- HW Context = 0x%08llx\n", > + dev_priv->engine[i].name, obj->gtt_offset); > print_error_obj(m, obj); > } > } > > if ((obj = error->semaphore_obj)) { > - err_printf(m, "Semaphore page = 0x%08x\n", > - lower_32_bits(obj->gtt_offset)); > - for (elt = 0; elt < PAGE_SIZE/16; elt += 4) { > - err_printf(m, "[%04x] %08x %08x %08x %08x\n", > - elt * 4, > - obj->pages[0][elt], > - obj->pages[0][elt+1], > - obj->pages[0][elt+2], > - obj->pages[0][elt+3]); > - } > + err_printf(m, "Semaphore page = 0x%08llx\n", obj->gtt_offset); > + print_error_obj(m, obj); > } > > if (error->overlay) > @@ -605,7 +594,7 @@ static void i915_error_object_free(struct drm_i915_error_object *obj) > return; > > for (page = 0; page < obj->page_count; page++) > - kfree(obj->pages[page]); > + free_page((unsigned long)obj->pages[page]); > > kfree(obj); > } > @@ -641,98 +630,107 @@ static void i915_error_state_free(struct kref *error_ref) > kfree(error); > } > > +static int compress_page(struct z_stream_s *zstream, > + void *src, > + struct drm_i915_error_object *dst) > +{ > + zstream->next_in = src; > + zstream->avail_in = PAGE_SIZE; > + > + do { > + if (zstream->avail_out == 0) { > + unsigned long page; > + > + page = __get_free_page(GFP_ATOMIC | __GFP_NOWARN); > + if (!page) > + return -ENOMEM; > + > + dst->pages[dst->page_count++] = (void *)page; > + > + zstream->next_out = (void *)page; > + zstream->avail_out = PAGE_SIZE; > + } > + > + if (zlib_deflate(zstream, Z_SYNC_FLUSH) != Z_OK) > + return -EIO; > + > +#if 0 > + if (zstream->total_out > zstream->total_in) > + return -E2BIG; > +#endif > + } while (zstream->avail_in); > + > + return 0; > +} > + > static struct drm_i915_error_object * > -i915_error_object_create(struct drm_i915_private *dev_priv, > +i915_error_object_create(struct drm_i915_private *i915, > struct i915_vma *vma) > { > - struct i915_ggtt *ggtt = &dev_priv->ggtt; > - struct drm_i915_gem_object *src; > + struct i915_ggtt *ggtt = &i915->ggtt; > + const u64 slot = ggtt->gpu_error.start; > struct drm_i915_error_object *dst; > - int num_pages; > - bool use_ggtt; > - int i = 0; > - u64 reloc_offset; > + struct z_stream_s zstream; > + unsigned long num_pages; > + struct sgt_iter iter; > + dma_addr_t dma; > > if (!vma) > return NULL; > > - src = vma->obj; > - if (!src->pages) > - return NULL; > - > - num_pages = src->base.size >> PAGE_SHIFT; > - > - dst = kmalloc(sizeof(*dst) + num_pages * sizeof(u32 *), GFP_ATOMIC); > + num_pages = vma->size >> PAGE_SHIFT; > + num_pages = 10 * num_pages * sizeof(u32 *) >> 3; > + dst = kmalloc(sizeof(*dst) + num_pages, GFP_ATOMIC | __GFP_NOWARN); > if (!dst) > return NULL; > > dst->gtt_offset = vma->node.start; > - dst->gtt_size = vma->node.size; > + dst->page_count = 0; > + dst->unused = 0; > + > + memset(&zstream, 0, sizeof(zstream)); > + zstream.workspace = kmalloc(zlib_deflate_workspacesize(MAX_WBITS, > + MAX_MEM_LEVEL), > + GFP_ATOMIC | __GFP_NOWARN); > + if (!zstream.workspace || > + zlib_deflateInit(&zstream, Z_DEFAULT_COMPRESSION) != Z_OK) { > + kfree(zstream.workspace); > + kfree(dst); > + return NULL; > + } > > - reloc_offset = dst->gtt_offset; > - use_ggtt = (src->cache_level == I915_CACHE_NONE && > - (vma->flags & I915_VMA_GLOBAL_BIND) && > - reloc_offset + num_pages * PAGE_SIZE <= ggtt->mappable_end); > + for_each_sgt_dma(dma, iter, > + vma->ggtt_view.pages ?: vma->obj->pages) { > + int ret; > + void *s; > > - /* Cannot access stolen address directly, try to use the aperture */ > - if (src->stolen) { > - use_ggtt = true; > + ggtt->base.insert_page(&ggtt->base, dma, slot, > + I915_CACHE_NONE, 0); > > - if (!(vma->flags & I915_VMA_GLOBAL_BIND)) > - goto unwind; > + s = (void *__force) > + io_mapping_map_atomic_wc(ggtt->mappable, slot); > + ret = compress_page(&zstream, s, dst); > + io_mapping_unmap_atomic(s); > > - reloc_offset = vma->node.start; > - if (reloc_offset + num_pages * PAGE_SIZE > ggtt->mappable_end) > + if (ret) > goto unwind; > } > + zlib_deflate(&zstream, Z_FINISH); > + dst->unused = zstream.avail_out; > +out: > + zlib_deflateEnd(&zstream); > + kfree(zstream.workspace); > > - /* Cannot access snooped pages through the aperture */ > - if (use_ggtt && src->cache_level != I915_CACHE_NONE && > - !HAS_LLC(dev_priv)) > - goto unwind; > - > - dst->page_count = num_pages; > - while (num_pages--) { > - void *d; > - > - d = kmalloc(PAGE_SIZE, GFP_ATOMIC); > - if (d == NULL) > - goto unwind; > - > - if (use_ggtt) { > - void __iomem *s; > - > - /* Simply ignore tiling or any overlapping fence. > - * It's part of the error state, and this hopefully > - * captures what the GPU read. > - */ > - > - s = io_mapping_map_atomic_wc(ggtt->mappable, > - reloc_offset); > - memcpy_fromio(d, s, PAGE_SIZE); > - io_mapping_unmap_atomic(s); > - } else { > - struct page *page; > - void *s; > - > - page = i915_gem_object_get_page(src, i); > - > - s = kmap_atomic(page); > - memcpy(d, s, PAGE_SIZE); > - kunmap_atomic(s); > - } > - > - dst->pages[i++] = d; > - reloc_offset += PAGE_SIZE; > - } > + ggtt->base.clear_range(&ggtt->base, slot, PAGE_SIZE, true); > > return dst; > > unwind: > - while (i--) > - kfree(dst->pages[i]); > + while (dst->page_count--) > + free_page((unsigned long)dst->pages[dst->page_count]); > kfree(dst); > - return NULL; > + dst = NULL; > + goto out; > } > > /* The error capture is special as tries to run underneath the normal > @@ -979,6 +977,8 @@ static void error_record_engine_registers(struct drm_i915_error_state *error, > ee->head = I915_READ_HEAD(engine); > ee->tail = I915_READ_TAIL(engine); > ee->ctl = I915_READ_CTL(engine); > + if (INTEL_GEN(dev_priv) > 2) > + ee->mode = I915_READ_MODE(engine); > > if (I915_NEED_GFX_HWS(dev_priv)) { > i915_reg_t mmio; > @@ -1367,9 +1367,6 @@ static int capture(void *data) > { > struct drm_i915_error_state *error = data; > > - /* Ensure that what we readback from memory matches what the GPU sees */ > - wbinvd(); > - > i915_capture_gen_state(error->i915, error); > i915_capture_reg_state(error->i915, error); > i915_gem_record_fences(error->i915, error); > @@ -1383,9 +1380,6 @@ static int capture(void *data) > error->overlay = intel_overlay_capture_error_state(error->i915); > error->display = intel_display_capture_error_state(error->i915); > > - /* And make sure we don't leave trash in the CPU cache */ > - wbinvd(); > - > return 0; > } > > @@ -1459,7 +1453,6 @@ void i915_error_state_get(struct drm_device *dev, > if (error_priv->error) > kref_get(&error_priv->error->ref); > spin_unlock_irq(&dev_priv->gpu_error.lock); > - > } > > void i915_error_state_put(struct i915_error_state_file_priv *error_priv) > -- > 2.8.1 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > https://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx