On Wed, Oct 12, 2016 at 10:05:20AM +0100, Chris Wilson wrote: > Since the GTT provides universal access to any GPU page, we can use it > to reduce our plethora of read methods to just one. It also has the > important characteristic of being exactly what the GPU sees - if there > are incoherency problems, seeing the batch as executed (rather than as > trapped inside the cpu cache) is important. > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Reviewed-by: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx> Meh, so much for me reading patches in order ;-) Please reorder this one before the previous one and I'll be happy too. > --- > drivers/gpu/drm/i915/i915_gem_gtt.c | 43 ++++++++---- > drivers/gpu/drm/i915/i915_gem_gtt.h | 2 + > drivers/gpu/drm/i915/i915_gpu_error.c | 120 ++++++++++++---------------------- > 3 files changed, 74 insertions(+), 91 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c > index 0bb4232f66bc..2d846aa39ca5 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c > @@ -2717,6 +2717,7 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv) > */ > struct i915_ggtt *ggtt = &dev_priv->ggtt; > unsigned long hole_start, hole_end; > + struct i915_hw_ppgtt *ppgtt; > struct drm_mm_node *entry; > int ret; > > @@ -2724,6 +2725,15 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv) > if (ret) > return ret; > > + /* Reserve a mappable slot for our lockless error capture */ > + ret = drm_mm_insert_node_in_range_generic(&ggtt->base.mm, > + &ggtt->error_capture, > + 4096, 0, -1, > + 0, ggtt->mappable_end, > + 0, 0); > + if (ret) > + return ret; > + > /* Clear any non-preallocated blocks */ > drm_mm_for_each_hole(entry, &ggtt->base.mm, hole_start, hole_end) { > DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n", > @@ -2738,25 +2748,21 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv) > true); > > if (USES_PPGTT(dev_priv) && !USES_FULL_PPGTT(dev_priv)) { > - struct i915_hw_ppgtt *ppgtt; > - > ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); > - if (!ppgtt) > - return -ENOMEM; > + if (!ppgtt) { > + ret = -ENOMEM; > + goto err; > + } > > ret = __hw_ppgtt_init(ppgtt, dev_priv); > - if (ret) { > - kfree(ppgtt); > - return ret; > - } > + if (ret) > + goto err_ppgtt; > > - if (ppgtt->base.allocate_va_range) > + if (ppgtt->base.allocate_va_range) { > ret = ppgtt->base.allocate_va_range(&ppgtt->base, 0, > ppgtt->base.total); > - if (ret) { > - ppgtt->base.cleanup(&ppgtt->base); > - kfree(ppgtt); > - return ret; > + if (ret) > + goto err_ppgtt_cleanup; > } > > ppgtt->base.clear_range(&ppgtt->base, > @@ -2770,6 +2776,14 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv) > } > > return 0; > + > +err_ppgtt_cleanup: > + ppgtt->base.cleanup(&ppgtt->base); > +err_ppgtt: > + kfree(ppgtt); > +err: > + drm_mm_remove_node(&ggtt->error_capture); > + return ret; > } > > /** > @@ -2788,6 +2802,9 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv) > > i915_gem_cleanup_stolen(&dev_priv->drm); > > + if (drm_mm_node_allocated(&ggtt->error_capture)) > + drm_mm_remove_node(&ggtt->error_capture); > + > if (drm_mm_initialized(&ggtt->base.mm)) { > intel_vgt_deballoon(dev_priv); > > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h > index ec78be2f8c77..bd93fb8f99d2 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.h > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h > @@ -450,6 +450,8 @@ struct i915_ggtt { > bool do_idle_maps; > > int mtrr; > + > + struct drm_mm_node error_capture; > }; > > struct i915_hw_ppgtt { > diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c > index 159d6d7e0cee..b3b2e6c1c6c6 100644 > --- a/drivers/gpu/drm/i915/i915_gpu_error.c > +++ b/drivers/gpu/drm/i915/i915_gpu_error.c > @@ -656,7 +656,7 @@ static void i915_error_object_free(struct drm_i915_error_object *obj) > return; > > for (page = 0; page < obj->page_count; page++) > - kfree(obj->pages[page]); > + free_page((unsigned long)obj->pages[page]); > > kfree(obj); > } > @@ -693,98 +693,69 @@ static void i915_error_state_free(struct kref *error_ref) > kfree(error); > } > > +static int compress_page(void *src, struct drm_i915_error_object *dst) Jumping ahead a bit with the name, but imo ok ... > +{ > + unsigned long page; > + > + page = __get_free_page(GFP_ATOMIC | __GFP_NOWARN); > + if (!page) > + return -ENOMEM; > + > + dst->pages[dst->page_count++] = (void *)page; > + > + memcpy((void *)page, src, PAGE_SIZE); > + return 0; > +} > + > static struct drm_i915_error_object * > -i915_error_object_create(struct drm_i915_private *dev_priv, > +i915_error_object_create(struct drm_i915_private *i915, > struct i915_vma *vma) > { > - struct i915_ggtt *ggtt = &dev_priv->ggtt; > - struct drm_i915_gem_object *src; > + struct i915_ggtt *ggtt = &i915->ggtt; > + const u64 slot = ggtt->error_capture.start; > struct drm_i915_error_object *dst; > - int num_pages; > - bool use_ggtt; > - int i = 0; > - u64 reloc_offset; > + unsigned long num_pages; > + struct sgt_iter iter; > + dma_addr_t dma; > > if (!vma) > return NULL; > > - src = vma->obj; > - if (!src->pages) > - return NULL; > - > - num_pages = src->base.size >> PAGE_SHIFT; > - > - dst = kmalloc(sizeof(*dst) + num_pages * sizeof(u32 *), GFP_ATOMIC); > + num_pages = min_t(u64, vma->size, vma->obj->base.size) >> PAGE_SHIFT; > + dst = kmalloc(sizeof(*dst) + num_pages * sizeof(u32 *), > + GFP_ATOMIC | __GFP_NOWARN); > if (!dst) > return NULL; > > dst->gtt_offset = vma->node.start; > dst->gtt_size = vma->node.size; > + dst->page_count = 0; > > - reloc_offset = dst->gtt_offset; > - use_ggtt = (src->cache_level == I915_CACHE_NONE && > - (vma->flags & I915_VMA_GLOBAL_BIND) && > - reloc_offset + num_pages * PAGE_SIZE <= ggtt->mappable_end); > - > - /* Cannot access stolen address directly, try to use the aperture */ > - if (src->stolen) { > - use_ggtt = true; > - > - if (!(vma->flags & I915_VMA_GLOBAL_BIND)) > - goto unwind; > - > - reloc_offset = vma->node.start; > - if (reloc_offset + num_pages * PAGE_SIZE > ggtt->mappable_end) > - goto unwind; > - } > + for_each_sgt_dma(dma, iter, vma->pages) { > + void __iomem *s; > + int ret; > > - /* Cannot access snooped pages through the aperture */ > - if (use_ggtt && src->cache_level != I915_CACHE_NONE && > - !HAS_LLC(dev_priv)) > - goto unwind; > + ggtt->base.insert_page(&ggtt->base, dma, slot, > + I915_CACHE_NONE, 0); > > - dst->page_count = num_pages; > - while (num_pages--) { > - void *d; > + s = io_mapping_map_atomic_wc(&ggtt->mappable, slot); > + ret = compress_page((void * __force)s, dst); > + io_mapping_unmap_atomic(s); > > - d = kmalloc(PAGE_SIZE, GFP_ATOMIC); > - if (d == NULL) > + if (ret) > goto unwind; > - > - if (use_ggtt) { > - void __iomem *s; > - > - /* Simply ignore tiling or any overlapping fence. > - * It's part of the error state, and this hopefully > - * captures what the GPU read. > - */ > - > - s = io_mapping_map_atomic_wc(&ggtt->mappable, > - reloc_offset); > - memcpy_fromio(d, s, PAGE_SIZE); > - io_mapping_unmap_atomic(s); > - } else { > - struct page *page; > - void *s; > - > - page = i915_gem_object_get_page(src, i); > - > - s = kmap_atomic(page); > - memcpy(d, s, PAGE_SIZE); > - kunmap_atomic(s); > - } > - > - dst->pages[i++] = d; > - reloc_offset += PAGE_SIZE; > } > - > - return dst; > + goto out; > > unwind: > - while (i--) > - kfree(dst->pages[i]); > + while (dst->page_count--) > + free_page((unsigned long)dst->pages[dst->page_count]); > kfree(dst); > - return NULL; > + dst = NULL; > + > +out: > + ggtt->base.clear_range(&ggtt->base, slot, PAGE_SIZE, true); > + return dst; > } > > /* The error capture is special as tries to run underneath the normal > @@ -1445,9 +1416,6 @@ static int capture(void *data) > { > struct drm_i915_error_state *error = data; > > - /* Ensure that what we readback from memory matches what the GPU sees */ > - wbinvd(); > - > i915_capture_gen_state(error->i915, error); > i915_capture_reg_state(error->i915, error); > i915_gem_record_fences(error->i915, error); > @@ -1460,9 +1428,6 @@ static int capture(void *data) > error->overlay = intel_overlay_capture_error_state(error->i915); > error->display = intel_display_capture_error_state(error->i915); > > - /* And make sure we don't leave trash in the CPU cache */ > - wbinvd(); > - > return 0; > } > > @@ -1539,7 +1504,6 @@ void i915_error_state_get(struct drm_device *dev, > if (error_priv->error) > kref_get(&error_priv->error->ref); > spin_unlock_irq(&dev_priv->gpu_error.lock); > - > } Reviewed-by: Daniel Vetter <daniel.vetter@xxxxxxxx> -- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx