The algorithm/information was originally written by Chad, though I changed the control flow, and I think his original code had a couple of bugs, though I didn't look very hard before rewriting. That could have also been different interpretations of the spec. The excellent comments remain entirely copied from Chad's code. I've tested this on two platforms, and it seems to perform how I want. CC: Chad Versace <chad.versace at linux.intel.com> CC: Bryan Bell <bryan.j.bell at intel.com> Signed-off-by: Ben Widawsky <ben at bwidawsk.net> --- drivers/gpu/drm/i915/i915_dma.c | 2 +- drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/i915_gem.c | 53 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 56 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 0e22142..377949e 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -974,7 +974,7 @@ static int i915_getparam(struct drm_device *dev, void *data, value = 1; break; case I915_PARAM_HAS_LLC: - value = HAS_LLC(dev); + value = dev_priv->llc_size; break; case I915_PARAM_HAS_ALIASING_PPGTT: value = dev_priv->mm.aliasing_ppgtt ? 1 : 0; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index c8d6104..43a549d 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1187,6 +1187,8 @@ typedef struct drm_i915_private { /* Old dri1 support infrastructure, beware the dragons ya fools entering * here! */ struct i915_dri1_state dri1; + + size_t llc_size; } drm_i915_private_t; /* Iterate over initialised rings */ diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index af61be8..a070686 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4282,6 +4282,57 @@ i915_gem_lastclose(struct drm_device *dev) DRM_ERROR("failed to idle hardware: %d\n", ret); } +/** + * Return the size, in bytes, of the CPU L3 cache size. If the CPU has no L3 + * cache, or if an error occurs in obtaining the cache size, then return 0. + * From "Intel Processor Identification and the CPUID Instruction > 5.15 + * Deterministic Cache Parmaeters (Function 04h)": + * When EAX is initialized to a value of 4, the CPUID instruction returns + * deterministic cache information in the EAX, EBX, ECX and EDX registers. + * This function requires ECX be initialized with an index which indicates + * which cache to return information about. The OS is expected to call this + * function (CPUID.4) with ECX = 0, 1, 2, until EAX[4:0] == 0, indicating no + * more caches. The order in which the caches are returned is not specified + * and may change at Intel's discretion. + * + * Equation 5-4. Calculating the Cache Size in bytes: + * = (Ways +1) ? (Partitions +1) ? (Line Size +1) ? (Sets +1) + * = (EBX[31:22] +1) ? (EBX[21:12] +1) ? (EBX[11:0] +1 ? (ECX + 1) + */ +static size_t get_llc_size(struct drm_device *dev) +{ + u8 cnt = 0; + unsigned int eax, ebx, ecx, edx; + + if (!HAS_LLC(dev)) + return 0; + + do { + uint32_t cache_level; + uint32_t associativity, line_partitions, line_size, sets; + + eax = 4; + ecx = cnt; + __cpuid(&eax, &ebx, &ecx, &edx); + + cache_level = (eax >> 5) & 0x7; + if (cache_level != 3) + continue; + + associativity = ((ebx >> 22) & 0x3ff) + 1; + line_partitions = ((ebx >> 12) & 0x3ff) + 1; + line_size = (ebx & 0xfff) + 1; + sets = ecx + 1; + + return associativity * line_partitions * line_size * sets; + } while (eax & 0x1f && ++cnt); + + /* Let user space know we have LLC, but we can't figure it out */ + DRM_DEBUG_DRIVER("Couldn't find LLC size. Bug?\n"); + return 1; +} + + static void init_ring_lists(struct intel_ring_buffer *ring) { @@ -4333,6 +4384,8 @@ i915_gem_load(struct drm_device *dev) else dev_priv->num_fence_regs = 8; + dev_priv->llc_size = get_llc_size(dev); + /* Initialize fence registers to zero */ INIT_LIST_HEAD(&dev_priv->mm.fence_list); i915_gem_restore_fences(dev); -- 1.8.3.2