Certain HSW SKUs have a second bank of L3. This L3 remapping has a separate register set, and interrupt from the first "slice". A slice is simply a term to define some subset of the GPU's l3 cache. This patch implements both the interrupt handler, and ability to communicate with userspace about this second slice. Signed-off-by: Ben Widawsky <ben@xxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_drv.h | 9 ++-- drivers/gpu/drm/i915/i915_gem.c | 26 ++++++---- drivers/gpu/drm/i915/i915_irq.c | 84 +++++++++++++++++++++------------ drivers/gpu/drm/i915/i915_reg.h | 6 +++ drivers/gpu/drm/i915/i915_sysfs.c | 34 ++++++++++--- drivers/gpu/drm/i915/intel_ringbuffer.c | 6 +-- include/uapi/drm/i915_drm.h | 8 ++-- 7 files changed, 115 insertions(+), 58 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 81ba5bb..eb90461 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -918,9 +918,11 @@ struct i915_ums_state { int mm_suspended; }; +#define MAX_L3_SLICES 2 struct intel_l3_parity { - u32 *remap_info; + u32 *remap_info[MAX_L3_SLICES]; struct work_struct error_work; + int which_slice; }; struct i915_gem_mm { @@ -1686,7 +1688,8 @@ struct drm_i915_file_private { #define HAS_FORCE_WAKE(dev) (INTEL_INFO(dev)->has_force_wake) -#define HAS_L3_GPU_CACHE(dev) (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) +#define HAS_L3_GPU_CACHE(dev) (INTEL_INFO(dev)->gen >= 7) +#define NUM_L3_SLICES(dev) (IS_HSW_GT3(dev) ? 2 : HAS_L3_GPU_CACHE(dev)) #define GT_FREQUENCY_MULTIPLIER 50 @@ -1947,7 +1950,7 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force); int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj); int __must_check i915_gem_init(struct drm_device *dev); int __must_check i915_gem_init_hw(struct drm_device *dev); -void i915_gem_l3_remap(struct drm_device *dev); +void i915_gem_l3_remap(struct drm_device *dev, int slice); void i915_gem_init_swizzling(struct drm_device *dev); void i915_gem_cleanup_ringbuffer(struct drm_device *dev); int __must_check i915_gpu_idle(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 5b510a3..b11f7d6c 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4256,16 +4256,21 @@ i915_gem_idle(struct drm_device *dev) return 0; } -void i915_gem_l3_remap(struct drm_device *dev) +void i915_gem_l3_remap(struct drm_device *dev, int slice) { drm_i915_private_t *dev_priv = dev->dev_private; + u32 reg_base = GEN7_L3LOG_BASE + (slice * 0x200); + u32 *remap_info = dev_priv->l3_parity.remap_info[slice]; u32 misccpctl; int i; if (!HAS_L3_GPU_CACHE(dev)) return; - if (!dev_priv->l3_parity.remap_info) + if (NUM_L3_SLICES(dev) < 2 && slice) + return; + + if (!remap_info) return; misccpctl = I915_READ(GEN7_MISCCPCTL); @@ -4273,17 +4278,17 @@ void i915_gem_l3_remap(struct drm_device *dev) POSTING_READ(GEN7_MISCCPCTL); for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) { - u32 remap = I915_READ(GEN7_L3LOG_BASE + i); - if (remap && remap != dev_priv->l3_parity.remap_info[i/4]) + u32 remap = I915_READ(reg_base + i); + if (remap && remap != remap_info[i/4]) DRM_DEBUG("0x%x was already programmed to %x\n", - GEN7_L3LOG_BASE + i, remap); - if (remap && !dev_priv->l3_parity.remap_info[i/4]) + reg_base + i, remap); + if (remap && !remap_info[i/4]) DRM_DEBUG_DRIVER("Clearing remapped register\n"); - I915_WRITE(GEN7_L3LOG_BASE + i, dev_priv->l3_parity.remap_info[i/4]); + I915_WRITE(reg_base + i, remap_info[i/4]); } /* Make sure all the writes land before disabling dop clock gating */ - POSTING_READ(GEN7_L3LOG_BASE); + POSTING_READ(reg_base); I915_WRITE(GEN7_MISCCPCTL, misccpctl); } @@ -4377,7 +4382,7 @@ int i915_gem_init_hw(struct drm_device *dev) { drm_i915_private_t *dev_priv = dev->dev_private; - int ret; + int ret, i; if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt()) return -EIO; @@ -4396,7 +4401,8 @@ i915_gem_init_hw(struct drm_device *dev) I915_WRITE(GEN7_MSG_CTL, temp); } - i915_gem_l3_remap(dev); + for (i = 0; i < NUM_L3_SLICES(dev); i++) + i915_gem_l3_remap(dev, i); i915_gem_init_swizzling(dev); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 13d26cf..62cdf05 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -882,9 +882,10 @@ static void ivybridge_parity_work(struct work_struct *work) drm_i915_private_t *dev_priv = container_of(work, drm_i915_private_t, l3_parity.error_work); u32 error_status, row, bank, subbank; - char *parity_event[5]; + char *parity_event[6]; uint32_t misccpctl; unsigned long flags; + uint8_t slice = 0; /* We must turn off DOP level clock gating to access the L3 registers. * In order to prevent a get/put style interface, acquire struct mutex @@ -892,45 +893,63 @@ static void ivybridge_parity_work(struct work_struct *work) */ mutex_lock(&dev_priv->dev->struct_mutex); + /* If we've screwed up tracking, just let the interrupt fire again */ + if (WARN_ON(!dev_priv->l3_parity.which_slice)) + goto out; + misccpctl = I915_READ(GEN7_MISCCPCTL); I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE); POSTING_READ(GEN7_MISCCPCTL); - error_status = I915_READ(GEN7_L3CDERRST1); - row = GEN7_PARITY_ERROR_ROW(error_status); - bank = GEN7_PARITY_ERROR_BANK(error_status); - subbank = GEN7_PARITY_ERROR_SUBBANK(error_status); + while ((slice = ffs(dev_priv->l3_parity.which_slice)) != 0) { + u32 reg; - I915_WRITE(GEN7_L3CDERRST1, GEN7_PARITY_ERROR_VALID | - GEN7_L3CDERRST1_ENABLE); - POSTING_READ(GEN7_L3CDERRST1); + if (WARN_ON(slice >= MAX_L3_SLICES)) + break; - I915_WRITE(GEN7_MISCCPCTL, misccpctl); + dev_priv->l3_parity.which_slice &= ~(1<<slice); - spin_lock_irqsave(&dev_priv->irq_lock, flags); - ilk_enable_gt_irq(dev_priv, GT_RENDER_L3_PARITY_ERROR_INTERRUPT); - spin_unlock_irqrestore(&dev_priv->irq_lock, flags); + reg = GEN7_L3CDERRST1 + (slice * 0x200); - mutex_unlock(&dev_priv->dev->struct_mutex); + error_status = I915_READ(reg); + row = GEN7_PARITY_ERROR_ROW(error_status); + bank = GEN7_PARITY_ERROR_BANK(error_status); + subbank = GEN7_PARITY_ERROR_SUBBANK(error_status); + + I915_WRITE(reg, GEN7_PARITY_ERROR_VALID | GEN7_L3CDERRST1_ENABLE); + POSTING_READ(reg); + + parity_event[0] = I915_L3_PARITY_UEVENT "=1"; + parity_event[1] = kasprintf(GFP_KERNEL, "ROW=%d", row); + parity_event[2] = kasprintf(GFP_KERNEL, "BANK=%d", bank); + parity_event[3] = kasprintf(GFP_KERNEL, "SUBBANK=%d", subbank); + parity_event[4] = kasprintf(GFP_KERNEL, "SLICE=%d", slice); + parity_event[5] = NULL; - parity_event[0] = I915_L3_PARITY_UEVENT "=1"; - parity_event[1] = kasprintf(GFP_KERNEL, "ROW=%d", row); - parity_event[2] = kasprintf(GFP_KERNEL, "BANK=%d", bank); - parity_event[3] = kasprintf(GFP_KERNEL, "SUBBANK=%d", subbank); - parity_event[4] = NULL; + kobject_uevent_env(&dev_priv->dev->primary->kdev.kobj, + KOBJ_CHANGE, parity_event); - kobject_uevent_env(&dev_priv->dev->primary->kdev.kobj, - KOBJ_CHANGE, parity_event); + DRM_DEBUG("Parity error: Slice = %d, Row = %d, Bank = %d, Sub bank = %d.\n", + slice, row, bank, subbank); - DRM_DEBUG("Parity error: Row = %d, Bank = %d, Sub bank = %d.\n", - row, bank, subbank); + kfree(parity_event[4]); + kfree(parity_event[3]); + kfree(parity_event[2]); + kfree(parity_event[1]); + } + + I915_WRITE(GEN7_MISCCPCTL, misccpctl); + +out: + WARN_ON(dev_priv->l3_parity.which_slice); + spin_lock_irqsave(&dev_priv->irq_lock, flags); + ilk_enable_gt_irq(dev_priv, GT_PARITY_ERROR); + spin_unlock_irqrestore(&dev_priv->irq_lock, flags); - kfree(parity_event[3]); - kfree(parity_event[2]); - kfree(parity_event[1]); + mutex_unlock(&dev_priv->dev->struct_mutex); } -static void ivybridge_parity_error_irq_handler(struct drm_device *dev) +static void ivybridge_parity_error_irq_handler(struct drm_device *dev, u32 iir) { drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private; @@ -938,9 +957,12 @@ static void ivybridge_parity_error_irq_handler(struct drm_device *dev) return; spin_lock(&dev_priv->irq_lock); - ilk_disable_gt_irq(dev_priv, GT_RENDER_L3_PARITY_ERROR_INTERRUPT); + ilk_disable_gt_irq(dev_priv, GT_PARITY_ERROR); spin_unlock(&dev_priv->irq_lock); + iir &= GT_PARITY_ERROR; + dev_priv->l3_parity.which_slice = + 1 << (iir & GT_RENDER_L3_PARITY_ERROR_INTERRUPT_S1 ? 1 : 0); queue_work(dev_priv->wq, &dev_priv->l3_parity.error_work); } @@ -975,8 +997,8 @@ static void snb_gt_irq_handler(struct drm_device *dev, i915_handle_error(dev, false); } - if (gt_iir & GT_RENDER_L3_PARITY_ERROR_INTERRUPT) - ivybridge_parity_error_irq_handler(dev); + if (gt_iir & GT_PARITY_ERROR) + ivybridge_parity_error_irq_handler(dev, gt_iir); } #define HPD_STORM_DETECT_PERIOD 1000 @@ -2261,8 +2283,8 @@ static void gen5_gt_irq_postinstall(struct drm_device *dev) dev_priv->gt_irq_mask = ~0; if (HAS_L3_GPU_CACHE(dev)) { /* L3 parity interrupt is always unmasked. */ - dev_priv->gt_irq_mask = ~GT_RENDER_L3_PARITY_ERROR_INTERRUPT; - gt_irqs |= GT_RENDER_L3_PARITY_ERROR_INTERRUPT; + dev_priv->gt_irq_mask = ~GT_PARITY_ERROR; + gt_irqs |= GT_PARITY_ERROR; } gt_irqs |= GT_RENDER_USER_INTERRUPT; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index bcee89b..4155a1d 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -927,6 +927,7 @@ #define GT_BLT_USER_INTERRUPT (1 << 22) #define GT_BSD_CS_ERROR_INTERRUPT (1 << 15) #define GT_BSD_USER_INTERRUPT (1 << 12) +#define GT_RENDER_L3_PARITY_ERROR_INTERRUPT_S1 (1 << 11) /* hsw+; rsvd on snb, ivb, vlv */ #define GT_RENDER_L3_PARITY_ERROR_INTERRUPT (1 << 5) /* !snb */ #define GT_RENDER_PIPECTL_NOTIFY_INTERRUPT (1 << 4) #define GT_RENDER_CS_MASTER_ERROR_INTERRUPT (1 << 3) @@ -937,6 +938,9 @@ #define PM_VEBOX_CS_ERROR_INTERRUPT (1 << 12) /* hsw+ */ #define PM_VEBOX_USER_INTERRUPT (1 << 10) /* hsw+ */ +#define GT_PARITY_ERROR (GT_RENDER_L3_PARITY_ERROR_INTERRUPT_S1 | \ + GT_RENDER_L3_PARITY_ERROR_INTERRUPT) + /* These are all the "old" interrupts */ #define ILK_BSD_USER_INTERRUPT (1<<5) #define I915_PIPE_CONTROL_NOTIFY_INTERRUPT (1<<18) @@ -4742,6 +4746,7 @@ /* IVYBRIDGE DPF */ #define GEN7_L3CDERRST1 0xB008 /* L3CD Error Status 1 */ +#define HSW_L3CDERRST11 0xB208 /* L3CD Error Status register 1 slice 1 */ #define GEN7_L3CDERRST1_ROW_MASK (0x7ff<<14) #define GEN7_PARITY_ERROR_VALID (1<<13) #define GEN7_L3CDERRST1_BANK_MASK (3<<11) @@ -4755,6 +4760,7 @@ #define GEN7_L3CDERRST1_ENABLE (1<<7) #define GEN7_L3LOG_BASE 0xB070 +#define HSW_L3LOG_BASE_SLICE1 0xB270 #define GEN7_L3LOG_SIZE 0x80 #define GEN7_HALF_SLICE_CHICKEN1 0xe100 /* IVB GT1 + VLV */ diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index 43c2e81..d208f2d 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -119,6 +119,7 @@ i915_l3_read(struct file *filp, struct kobject *kobj, struct drm_device *drm_dev = dminor->dev; struct drm_i915_private *dev_priv = drm_dev->dev_private; uint32_t misccpctl; + int slice = (int)(uintptr_t)attr->private; int i, ret; count = round_down(count, 4); @@ -135,11 +136,11 @@ i915_l3_read(struct file *filp, struct kobject *kobj, if (IS_HASWELL(drm_dev)) { int last = min_t(int, GEN7_L3LOG_SIZE, count + offset); - if ((!dev_priv->l3_parity.remap_info)) + if ((!dev_priv->l3_parity.remap_info[slice])) memset(buf + offset, 0, last - offset); else memcpy(buf + offset, - dev_priv->l3_parity.remap_info + (offset/4), + dev_priv->l3_parity.remap_info[slice] + (offset/4), last - offset); i = last; @@ -170,6 +171,7 @@ i915_l3_write(struct file *filp, struct kobject *kobj, struct drm_device *drm_dev = dminor->dev; struct drm_i915_private *dev_priv = drm_dev->dev_private; u32 *temp = NULL; /* Just here to make handling failures easy */ + int slice = (int)(uintptr_t)attr->private; int ret; ret = l3_access_valid(drm_dev, offset); @@ -180,7 +182,7 @@ i915_l3_write(struct file *filp, struct kobject *kobj, if (ret) return ret; - if (!dev_priv->l3_parity.remap_info) { + if (!dev_priv->l3_parity.remap_info[slice]) { temp = kzalloc(GEN7_L3LOG_SIZE, GFP_KERNEL); if (!temp) { mutex_unlock(&drm_dev->struct_mutex); @@ -200,11 +202,11 @@ i915_l3_write(struct file *filp, struct kobject *kobj, * at this point it is left as a TODO. */ if (temp) - dev_priv->l3_parity.remap_info = temp; + dev_priv->l3_parity.remap_info[slice] = temp; - memcpy(dev_priv->l3_parity.remap_info + (offset/4), buf, count); + memcpy(dev_priv->l3_parity.remap_info[slice] + (offset/4), buf, count); - i915_gem_l3_remap(drm_dev); + i915_gem_l3_remap(drm_dev, slice); mutex_unlock(&drm_dev->struct_mutex); @@ -216,7 +218,17 @@ static struct bin_attribute dpf_attrs = { .size = GEN7_L3LOG_SIZE, .read = i915_l3_read, .write = i915_l3_write, - .mmap = NULL + .mmap = NULL, + .private = (void *)0 +}; + +static struct bin_attribute dpf_attrs_1 = { + .attr = {.name = "l3_parity_slice_1", .mode = (S_IRUSR | S_IWUSR)}, + .size = GEN7_L3LOG_SIZE, + .read = i915_l3_read, + .write = i915_l3_write, + .mmap = NULL, + .private = (void *)1 }; static ssize_t gt_cur_freq_mhz_show(struct device *kdev, @@ -527,6 +539,13 @@ void i915_setup_sysfs(struct drm_device *dev) ret = device_create_bin_file(&dev->primary->kdev, &dpf_attrs); if (ret) DRM_ERROR("l3 parity sysfs setup failed\n"); + + if (NUM_L3_SLICES(dev) > 1) { + ret = device_create_bin_file(&dev->primary->kdev, + &dpf_attrs_1); + if (ret) + DRM_ERROR("l3 parity slice 1 setup failed\n"); + } } ret = 0; @@ -550,6 +569,7 @@ void i915_teardown_sysfs(struct drm_device *dev) sysfs_remove_files(&dev->primary->kdev.kobj, vlv_attrs); else sysfs_remove_files(&dev->primary->kdev.kobj, gen6_attrs); + device_remove_bin_file(&dev->primary->kdev, &dpf_attrs_1); device_remove_bin_file(&dev->primary->kdev, &dpf_attrs); #ifdef CONFIG_PM sysfs_unmerge_group(&dev->primary->kdev.kobj, &rc6_attr_group); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 686e5b2..3539b45 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -570,7 +570,7 @@ static int init_render_ring(struct intel_ring_buffer *ring) I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); if (HAS_L3_GPU_CACHE(dev)) - I915_WRITE_IMR(ring, ~GT_RENDER_L3_PARITY_ERROR_INTERRUPT); + I915_WRITE_IMR(ring, ~GT_PARITY_ERROR); return ret; } @@ -1000,7 +1000,7 @@ gen6_ring_get_irq(struct intel_ring_buffer *ring) if (HAS_L3_GPU_CACHE(dev) && ring->id == RCS) I915_WRITE_IMR(ring, ~(ring->irq_enable_mask | - GT_RENDER_L3_PARITY_ERROR_INTERRUPT)); + GT_PARITY_ERROR)); else I915_WRITE_IMR(ring, ~ring->irq_enable_mask); ilk_enable_gt_irq(dev_priv, ring->irq_enable_mask); @@ -1021,7 +1021,7 @@ gen6_ring_put_irq(struct intel_ring_buffer *ring) if (--ring->irq_refcount == 0) { if (HAS_L3_GPU_CACHE(dev) && ring->id == RCS) I915_WRITE_IMR(ring, - ~GT_RENDER_L3_PARITY_ERROR_INTERRUPT); + ~GT_PARITY_ERROR); else I915_WRITE_IMR(ring, ~0); ilk_disable_gt_irq(dev_priv, ring->irq_enable_mask); diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 55bb572..3a4e97b 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -38,10 +38,10 @@ * * I915_L3_PARITY_UEVENT - Generated when the driver receives a parity mismatch * event from the gpu l3 cache. Additional information supplied is ROW, - * BANK, SUBBANK of the affected cacheline. Userspace should keep track of - * these events and if a specific cache-line seems to have a persistent - * error remap it with the l3 remapping tool supplied in intel-gpu-tools. - * The value supplied with the event is always 1. + * BANK, SUBBANK, SLICE of the affected cacheline. Userspace should keep + * track of these events and if a specific cache-line seems to have a + * persistent error remap it with the l3 remapping tool supplied in + * intel-gpu-tools. The value supplied with the event is always 1. * * I915_ERROR_UEVENT - Generated upon error detection, currently only via * hangcheck. The error detection event is a good indicator of when things -- 1.8.4 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx