In order to fully serialize access to the fenced region and the update to the fence register we need to take extreme measures on SNB+, and write the fence from each cpu taking care to serialise memory accesses on each. The usual mb(), or even a mb() on each CPU is not enough to ensure that access to the fenced region is coherent across the change in fence register. v2: Bring a bigger gun Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=62191 Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk> CC: Jon Bloomfield <jon.bloomfield at intel.com> Cc: stable at vger.kernel.org --- drivers/gpu/drm/i915/i915_gem.c | 42 +++++++++++++++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 0e207e6..a92d431 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -32,6 +32,7 @@ #include "intel_drv.h" #include <linux/shmem_fs.h> #include <linux/slab.h> +#include <linux/stop_machine.h> #include <linux/swap.h> #include <linux/pci.h> #include <linux/dma-buf.h> @@ -2678,17 +2679,50 @@ static inline int fence_number(struct drm_i915_private *dev_priv, return fence - dev_priv->fence_regs; } +struct write_fence { + struct drm_device *dev; + struct drm_i915_gem_object *obj; + int fence; +}; + +static int i915_gem_write_fence__ipi(void *data) +{ + struct write_fence *args = data; + i915_gem_write_fence(args->dev, args->fence, args->obj); + return 0; +} + static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, struct drm_i915_fence_reg *fence, bool enable) { struct drm_i915_private *dev_priv = obj->base.dev->dev_private; - int reg = fence_number(dev_priv, fence); - - i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL); + struct write_fence args = { + .dev = obj->base.dev, + .fence = fence_number(dev_priv, fence), + .obj = enable ? obj : NULL, + }; + + /* In order to fully serialize access to the fenced region and + * the update to the fence register we need to take extreme + * measures on SNB+, and write the fence from each cpu taking + * care to serialise memory accesses on each. The usual mb(), + * or even a mb() on each CPU is not enough to ensure that access + * to the fenced region is coherent across the change in fence + * register. + * + * As it turns out for IVB, I need a bigger gun. + */ + if (HAS_LLC(obj->base.dev)) { + if (INTEL_INFO(obj->base.dev)->gen >= 7) + stop_machine(i915_gem_write_fence__ipi, &args, cpu_possible_mask); + else + on_each_cpu((void (*)(void *))i915_gem_write_fence__ipi, &args, 1); + } else + i915_gem_write_fence__ipi(&args); if (enable) { - obj->fence_reg = reg; + obj->fence_reg = args.fence; fence->obj = obj; list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list); } else { -- 1.7.10.4