On Fri, Jul 20, 2018 at 08:59:31AM +0100, Chris Wilson wrote: > Not all chipsets have an internal buffer delaying the visibility of > writes via the GGTT being visible by other physical paths, but we use a > very heavy workaround for all. We only need to apply that workarounds to > the chipsets we know suffer from the delay and the resulting coherency > issue. > > Similarly, the same inconsistent coherency fouls up our ABI promise that > a write into a mmap_gtt is immediately visible to others. Since the HW > has made that a lie, let userspace know when that contract is broken. > (Not that userspace would want to use mmap_gtt on those chipsets for > other performance reasons...) > > Testcase: igt/drv_selftest/live_coherency > Testcase: igt/gem_mmap_gtt/coherency > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100587 > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Cc: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_drv.c | 3 +++ > drivers/gpu/drm/i915/i915_gem.c | 5 +++++ > drivers/gpu/drm/i915/i915_pci.c | 10 ++++++++++ > drivers/gpu/drm/i915/intel_device_info.h | 1 + > include/uapi/drm/i915_drm.h | 22 ++++++++++++++++++++++ > 5 files changed, 41 insertions(+) > > diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c > index f8cfd16be534..18a45e7a3d7c 100644 > --- a/drivers/gpu/drm/i915/i915_drv.c > +++ b/drivers/gpu/drm/i915/i915_drv.c > @@ -441,6 +441,9 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data, > case I915_PARAM_CS_TIMESTAMP_FREQUENCY: > value = 1000 * INTEL_INFO(dev_priv)->cs_timestamp_frequency_khz; > break; > + case I915_PARAM_MMAP_GTT_COHERENT: > + value = INTEL_INFO(dev_priv)->has_coherent_ggtt; > + break; > default: > DRM_DEBUG("Unknown parameter %d\n", param->param); > return -EINVAL; > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index fcc73a6ab503..8b52cb768a67 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -802,6 +802,11 @@ void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv) > * that was!). > */ > > + wmb(); > + > + if (INTEL_INFO(dev_priv)->has_coherent_ggtt) > + return; > + > i915_gem_chipset_flush(dev_priv); > > intel_runtime_pm_get(dev_priv); > diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c > index 6a4d1388ad2d..e443fe44da3a 100644 > --- a/drivers/gpu/drm/i915/i915_pci.c > +++ b/drivers/gpu/drm/i915/i915_pci.c > @@ -74,6 +74,7 @@ ? These macros make me sad sometimes. Increase the context size a bit for this one? > .unfenced_needs_alignment = 1, \ > .ring_mask = RENDER_RING, \ > .has_snoop = true, \ > + .has_coherent_ggtt = false, \ > GEN_DEFAULT_PIPEOFFSETS, \ > GEN_DEFAULT_PAGE_SIZES, \ > CURSOR_OFFSETS > @@ -110,6 +111,7 @@ static const struct intel_device_info intel_i865g_info = { > .has_gmch_display = 1, \ > .ring_mask = RENDER_RING, \ > .has_snoop = true, \ > + .has_coherent_ggtt = true, \ > GEN_DEFAULT_PIPEOFFSETS, \ > GEN_DEFAULT_PAGE_SIZES, \ > CURSOR_OFFSETS > @@ -117,6 +119,7 @@ static const struct intel_device_info intel_i865g_info = { > static const struct intel_device_info intel_i915g_info = { > GEN3_FEATURES, > PLATFORM(INTEL_I915G), > + .has_coherent_ggtt = false, > .cursor_needs_physical = 1, > .has_overlay = 1, .overlay_needs_physical = 1, > .hws_needs_physical = 1, > @@ -178,6 +181,7 @@ static const struct intel_device_info intel_pineview_info = { > .has_gmch_display = 1, \ > .ring_mask = RENDER_RING, \ > .has_snoop = true, \ > + .has_coherent_ggtt = true, \ > GEN_DEFAULT_PIPEOFFSETS, \ > GEN_DEFAULT_PAGE_SIZES, \ > CURSOR_OFFSETS > @@ -220,6 +224,7 @@ static const struct intel_device_info intel_gm45_info = { > .has_hotplug = 1, \ > .ring_mask = RENDER_RING | BSD_RING, \ > .has_snoop = true, \ > + .has_coherent_ggtt = true, \ > /* ilk does support rc6, but we do not implement [power] contexts */ \ > .has_rc6 = 0, \ > GEN_DEFAULT_PIPEOFFSETS, \ > @@ -243,6 +248,7 @@ static const struct intel_device_info intel_ironlake_m_info = { > .has_hotplug = 1, \ > .has_fbc = 1, \ > .ring_mask = RENDER_RING | BSD_RING | BLT_RING, \ > + .has_coherent_ggtt = true, \ > .has_llc = 1, \ > .has_rc6 = 1, \ > .has_rc6p = 1, \ > @@ -287,6 +293,7 @@ static const struct intel_device_info intel_sandybridge_m_gt2_info = { > .has_hotplug = 1, \ > .has_fbc = 1, \ > .ring_mask = RENDER_RING | BSD_RING | BLT_RING, \ > + .has_coherent_ggtt = true, \ > .has_llc = 1, \ > .has_rc6 = 1, \ > .has_rc6p = 1, \ > @@ -347,6 +354,7 @@ static const struct intel_device_info intel_valleyview_info = { > .has_aliasing_ppgtt = 1, > .has_full_ppgtt = 1, > .has_snoop = true, > + .has_coherent_ggtt = false, > .ring_mask = RENDER_RING | BSD_RING | BLT_RING, > .display_mmio_offset = VLV_DISPLAY_BASE, > GEN_DEFAULT_PAGE_SIZES, > @@ -441,6 +449,7 @@ static const struct intel_device_info intel_cherryview_info = { > .has_full_ppgtt = 1, > .has_reset_engine = 1, > .has_snoop = true, > + .has_coherent_ggtt = false, > .display_mmio_offset = VLV_DISPLAY_BASE, > GEN_DEFAULT_PAGE_SIZES, > GEN_CHV_PIPEOFFSETS, > @@ -517,6 +526,7 @@ static const struct intel_device_info intel_skylake_gt4_info = { > .has_full_48bit_ppgtt = 1, \ > .has_reset_engine = 1, \ > .has_snoop = true, \ > + .has_coherent_ggtt = false, \ > .has_ipc = 1, \ > GEN9_DEFAULT_PAGE_SIZES, \ > GEN_DEFAULT_PIPEOFFSETS, \ > diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h > index 633f9fbf72ea..07e8364d1a8c 100644 > --- a/drivers/gpu/drm/i915/intel_device_info.h > +++ b/drivers/gpu/drm/i915/intel_device_info.h > @@ -106,6 +106,7 @@ enum intel_platform { > func(has_resource_streamer); \ > func(has_runtime_pm); \ > func(has_snoop); \ > + func(has_coherent_ggtt); \ > func(unfenced_needs_alignment); \ > func(cursor_needs_physical); \ > func(hws_needs_physical); \ > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h > index 7f5634ce8e88..8ee81e6f151c 100644 > --- a/include/uapi/drm/i915_drm.h > +++ b/include/uapi/drm/i915_drm.h > @@ -529,6 +529,28 @@ typedef struct drm_i915_irq_wait { > */ > #define I915_PARAM_CS_TIMESTAMP_FREQUENCY 51 > > +/* > + * Once upon a time we supposed that writes through the GGTT would be > + * immediately in physical memory (once flushed out of the CPU path). However, > + * on a few different processors and chipsets, this is not necessarily the case > + * as the writes appear to be buffered internally. Thus a read of the backing > + * storage (physical memory) via a different path (with different physical tags > + * to the indirect write via the GGTT) will see stale values from before > + * the GGTT write. Inside the kernel, we can for the most part keep track of > + * the different read/write domains in use (e.g. set-domain), but the assumption > + * of coherency is baked into the ABI, hence reporting its true state in this > + * parameter. > + * > + * If set to true, writes via mmap_gtt are immediately visible following an > + * lfence to flush the WCB. > + * > + * If set to false, writes via mmap_gtt are indeterminatnly delayed in an in > + * internal buffer and are _not_ immediately visible to third parties accessing > + * directly via mmap_cpu/mmap_wc. Use of mmap_gtt as part of an IPC > + * communications channel when set to false is strongly disadvised. > + */ > +#define I915_PARAM_MMAP_GTT_COHERENT 52 > + > typedef struct drm_i915_getparam { > __s32 param; > /* > -- > 2.18.0 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > https://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Ville Syrjälä Intel _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx