On 17-01-04 20:42:32, Ville Syrjälä wrote:
From: Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx> SKL+ display engine can scan out certain kinds of compressed surfaces produced by the render engine. This involved telling the display engine the location of the color control surfae (CCS) which describes which parts of the main surface are compressed and which are not. The location of CCS is provided by userspace as just another plane with its own offset. Add the required stuff to validate the user provided AUX plane metadata and convert the user provided linear offset into something the hardware can consume. Due to hardware limitations we require that the main surface and the AUX surface (CCS) be part of the same bo. The hardware also makes life hard by not allowing you to provide separate x/y offsets for the main and AUX surfaces (excpet with NV12), so finding suitable offsets for both requires a bit of work. Assuming we still want keep playing tricks with the offsets. I've just gone with a dumb "search backward for suitable offsets" approach, which is far from optimal, but it works. Also not all planes will be capable of scanning out compressed surfaces, and eg. 90/270 degree rotation is not supported in combination with decompression either. This patch may contain work from at least the following people: * Vandana Kannan <vandana.kannan@xxxxxxxxx> * Daniel Vetter <daniel@xxxxxxxx> * Ben Widawsky <ben@xxxxxxxxxxxx> Cc: Vandana Kannan <vandana.kannan@xxxxxxxxx> Cc: Daniel Vetter <daniel@xxxxxxxx> Cc: Ben Widawsky <ben@xxxxxxxxxxxx> Cc: Jason Ekstrand <jason@xxxxxxxxxxxxxx> Signed-off-by: Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_reg.h | 22 ++++ drivers/gpu/drm/i915/intel_display.c | 219 +++++++++++++++++++++++++++++++++-- drivers/gpu/drm/i915/intel_pm.c | 8 +- drivers/gpu/drm/i915/intel_sprite.c | 5 + 4 files changed, 240 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 00970aa77afa..05e18e742776 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -6209,6 +6209,28 @@ enum { _ID(id, _PS_ECC_STAT_1A, _PS_ECC_STAT_2A), \ _ID(id, _PS_ECC_STAT_1B, _PS_ECC_STAT_2B)) +#define PLANE_AUX_DIST_1_A 0x701c0 +#define PLANE_AUX_DIST_2_A 0x702c0 +#define PLANE_AUX_DIST_1_B 0x711c0 +#define PLANE_AUX_DIST_2_B 0x712c0 +#define _PLANE_AUX_DIST_1(pipe) \ + _PIPE(pipe, PLANE_AUX_DIST_1_A, PLANE_AUX_DIST_1_B) +#define _PLANE_AUX_DIST_2(pipe) \ + _PIPE(pipe, PLANE_AUX_DIST_2_A, PLANE_AUX_DIST_2_B) +#define PLANE_AUX_DIST(pipe, plane) \ + _MMIO_PLANE(plane, _PLANE_AUX_DIST_1(pipe), _PLANE_AUX_DIST_2(pipe)) + +#define PLANE_AUX_OFFSET_1_A 0x701c4 +#define PLANE_AUX_OFFSET_2_A 0x702c4 +#define PLANE_AUX_OFFSET_1_B 0x711c4 +#define PLANE_AUX_OFFSET_2_B 0x712c4 +#define _PLANE_AUX_OFFSET_1(pipe) \ + _PIPE(pipe, PLANE_AUX_OFFSET_1_A, PLANE_AUX_OFFSET_1_B) +#define _PLANE_AUX_OFFSET_2(pipe) \ + _PIPE(pipe, PLANE_AUX_OFFSET_2_A, PLANE_AUX_OFFSET_2_B) +#define PLANE_AUX_OFFSET(pipe, plane) \ + _MMIO_PLANE(plane, _PLANE_AUX_OFFSET_1(pipe), _PLANE_AUX_OFFSET_2(pipe)) + /* legacy palette */ #define _LGC_PALETTE_A 0x4a000 #define _LGC_PALETTE_B 0x4a800 diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 38de9df0ec60..b547332eeda1 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2064,11 +2064,19 @@ intel_tile_width_bytes(const struct drm_framebuffer *fb, int plane) return 128; else return 512; + case I915_FORMAT_MOD_Y_TILED_CCS: + if (plane == 1) + return 64; + /* fall through */ case I915_FORMAT_MOD_Y_TILED: if (IS_GEN2(dev_priv) || HAS_128_BYTE_Y_TILING(dev_priv)) return 128; else return 512; + case I915_FORMAT_MOD_Yf_TILED_CCS: + if (plane == 1) + return 64; + /* fall through */ case I915_FORMAT_MOD_Yf_TILED: /* * Bspec seems to suggest that the Yf tile width would @@ -2156,7 +2164,7 @@ static unsigned int intel_surf_alignment(const struct drm_framebuffer *fb, struct drm_i915_private *dev_priv = to_i915(fb->dev); /* AUX_DIST needs only 4K alignment */ - if (fb->format->format == DRM_FORMAT_NV12 && plane == 1) + if (plane == 1)
This looks wrong at least within this context, surely multi-planar formats might have different alignment restrictions?
return 4096; switch (fb->modifier) { @@ -2166,6 +2174,8 @@ static unsigned int intel_surf_alignment(const struct drm_framebuffer *fb, if (INTEL_GEN(dev_priv) >= 9) return 256 * 1024; return 0; + case I915_FORMAT_MOD_Y_TILED_CCS: + case I915_FORMAT_MOD_Yf_TILED_CCS: case I915_FORMAT_MOD_Y_TILED: case I915_FORMAT_MOD_Yf_TILED: return 1 * 1024 * 1024; @@ -2472,6 +2482,7 @@ static unsigned int intel_fb_modifier_to_tiling(uint64_t fb_modifier) case I915_FORMAT_MOD_X_TILED: return I915_TILING_X; case I915_FORMAT_MOD_Y_TILED: + case I915_FORMAT_MOD_Y_TILED_CCS:
Is I915_FORMAT_MOD_Yf_TILED_CCS supposed to be here?
return I915_TILING_Y; default: return I915_TILING_NONE; @@ -2536,6 +2547,35 @@ intel_fill_fb_info(struct drm_i915_private *dev_priv, intel_fb_offset_to_xy(&x, &y, fb, i); + if ((fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS || + fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS) && i == 1) {
In one of my branches I turned this into a macro/inline because I ended up checking it pretty often (if modifier is CCS type and plane == 1). Just a thought.
+ int main_x, main_y; + int ccs_x, ccs_y; + + /* + * Each byte of CCS corresponds to a 16x8 area of the main surface, and + * each CCS tile is 64x64 bytes. + */ + ccs_x = (x * 16) % (64 * 16); + ccs_y = (y * 8) % (64 * 8); + main_x = intel_fb->normal[0].x % (64 * 16); + main_y = intel_fb->normal[0].y % (64 * 8); + + /* + * CCS doesn't have its own x/y offset register, so the intra CCS tile + * x/y offsets must match between CCS and the main surface. + */ + if (main_x != ccs_x || main_y != ccs_y) { + DRM_DEBUG_KMS("Bad CCS x/y (main %d,%d ccs %d,%d) full (main %d,%d ccs %d,%d)\n", + main_x, main_y, + ccs_x, ccs_y, + intel_fb->normal[0].x, + intel_fb->normal[0].y, + x, y); + return -EINVAL; + } + } + /* * The fence (if used) is aligned to the start of the object * so having the framebuffer wrap around across the edge of the @@ -2873,6 +2913,9 @@ static int skl_max_plane_width(const struct drm_framebuffer *fb, int plane, break; } break; + case I915_FORMAT_MOD_Y_TILED_CCS: + case I915_FORMAT_MOD_Yf_TILED_CCS: + /* FIXME AUX plane? */ case I915_FORMAT_MOD_Y_TILED: case I915_FORMAT_MOD_Yf_TILED: switch (cpp) { @@ -2895,6 +2938,42 @@ static int skl_max_plane_width(const struct drm_framebuffer *fb, int plane, return 2048; } +static bool skl_check_main_ccs_coordinates(struct intel_plane_state *plane_state, + int main_x, int main_y, u32 main_offset) +{ + const struct drm_framebuffer *fb = plane_state->base.fb; + int aux_x = plane_state->aux.x; + int aux_y = plane_state->aux.y; + u32 aux_offset = plane_state->aux.offset; + u32 alignment = intel_surf_alignment(fb, 1); + + while (aux_offset >= main_offset && aux_y <= main_y) { + int x, y; + + if (aux_x == main_x && aux_y == main_y) + break; + + if (aux_offset == 0) + break; + + x = aux_x / 16; + y = aux_y / 8; + aux_offset = intel_adjust_tile_offset(&x, &y, plane_state, 1, + aux_offset, aux_offset - alignment); + aux_x = x * 16 + aux_x % 16; + aux_y = y * 8 + aux_y % 8; + } + + if (aux_x != main_x || aux_y != main_y) + return false; + + plane_state->aux.offset = aux_offset; + plane_state->aux.x = aux_x; + plane_state->aux.y = aux_y; + + return true; +} + static int skl_check_main_surface(struct intel_plane_state *plane_state) { const struct drm_framebuffer *fb = plane_state->base.fb; @@ -2937,7 +3016,7 @@ static int skl_check_main_surface(struct intel_plane_state *plane_state) while ((x + w) * cpp > fb->pitches[0]) { if (offset == 0) { - DRM_DEBUG_KMS("Unable to find suitable display surface offset\n"); + DRM_DEBUG_KMS("Unable to find suitable display surface offset due to X-tiling\n"); return -EINVAL; } @@ -2946,6 +3025,26 @@ static int skl_check_main_surface(struct intel_plane_state *plane_state) } } + /* + * CCS AUX surface doesn't have its own x/y offsets, we must make sure + * they match with the main surface x/y offsets. + */ + if (fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS || + fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS) { + while (!skl_check_main_ccs_coordinates(plane_state, x, y, offset)) { + if (offset == 0) + break; + + offset = intel_adjust_tile_offset(&x, &y, plane_state, 0, + offset, offset - alignment); + } + + if (x != plane_state->aux.x || y != plane_state->aux.y) { + DRM_DEBUG_KMS("Unable to find suitable display surface offset due to CCS\n"); + return -EINVAL; + } + } + plane_state->main.offset = offset; plane_state->main.x = x; plane_state->main.y = y; @@ -2982,6 +3081,53 @@ static int skl_check_nv12_aux_surface(struct intel_plane_state *plane_state) return 0; } +static int skl_check_ccs_aux_surface(struct intel_plane_state *plane_state) +{ + struct intel_plane *plane = to_intel_plane(plane_state->base.plane); + struct intel_crtc *crtc = to_intel_crtc(plane_state->base.crtc); + int src_x = plane_state->base.src.x1 >> 16; + int src_y = plane_state->base.src.y1 >> 16; + int x = src_x / 16; + int y = src_y / 8; + u32 offset; + + switch (plane->id) { + case PLANE_PRIMARY: + case PLANE_SPRITE0: + break; + default: + DRM_DEBUG_KMS("RC support only on plane 1 and 2\n"); + return -EINVAL; + } + + if (crtc->pipe == PIPE_C) { + DRM_DEBUG_KMS("No RC support on pipe C\n"); + return -EINVAL; + } + /* + * TODO: + * 1. Disable stereo 3D when render decomp is enabled (bit 7:6) + * 2. Render decompression must not be used in VTd pass-through mode + * 3. Program hashing select CHICKEN_MISC1 bit 15 + */ + + if (plane_state->base.rotation && + plane_state->base.rotation & ~(DRM_ROTATE_0 | DRM_ROTATE_180)) { + DRM_DEBUG_KMS("RC support only with 0/180 degree rotation %x\n", + plane_state->base.rotation); + return -EINVAL; + } + + intel_add_fb_offsets(&x, &y, plane_state, 1); + offset = intel_compute_tile_offset(&x, &y, plane_state, 1); + + plane_state->aux.offset = offset; + plane_state->aux.x = x * 16 + src_x % 16; + plane_state->aux.y = y * 8 + src_y % 8; + + return 0; +} + int skl_check_plane_surface(struct intel_plane_state *plane_state) { const struct drm_framebuffer *fb = plane_state->base.fb; @@ -3002,6 +3148,11 @@ int skl_check_plane_surface(struct intel_plane_state *plane_state) ret = skl_check_nv12_aux_surface(plane_state); if (ret) return ret; + } else if (fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS || + fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS) { + ret = skl_check_ccs_aux_surface(plane_state); + if (ret) + return ret; } else { plane_state->aux.offset = ~0xfff; plane_state->aux.x = 0; @@ -3357,8 +3508,12 @@ u32 skl_plane_ctl_tiling(uint64_t fb_modifier) return PLANE_CTL_TILED_X; case I915_FORMAT_MOD_Y_TILED: return PLANE_CTL_TILED_Y; + case I915_FORMAT_MOD_Y_TILED_CCS: + return PLANE_CTL_TILED_Y | PLANE_CTL_DECOMPRESSION_ENABLE; case I915_FORMAT_MOD_Yf_TILED: return PLANE_CTL_TILED_YF; + case I915_FORMAT_MOD_Yf_TILED_CCS: + return PLANE_CTL_TILED_YF | PLANE_CTL_DECOMPRESSION_ENABLE; default: MISSING_CASE(fb_modifier); } @@ -3401,6 +3556,7 @@ static void skylake_update_primary_plane(struct drm_plane *plane, u32 plane_ctl; unsigned int rotation = plane_state->base.rotation; u32 stride = skl_plane_stride(fb, 0, rotation); + u32 aux_stride = skl_plane_stride(fb, 1, rotation); u32 surf_addr = plane_state->main.offset; int scaler_id = plane_state->scaler_id; int src_x = plane_state->main.x; @@ -3436,6 +3592,10 @@ static void skylake_update_primary_plane(struct drm_plane *plane, I915_WRITE(PLANE_OFFSET(pipe, plane_id), (src_y << 16) | src_x); I915_WRITE(PLANE_STRIDE(pipe, plane_id), stride); I915_WRITE(PLANE_SIZE(pipe, plane_id), (src_h << 16) | src_w); + I915_WRITE(PLANE_AUX_DIST(pipe, plane_id), + (plane_state->aux.offset - surf_addr) | aux_stride); + I915_WRITE(PLANE_AUX_OFFSET(pipe, plane_id), + (plane_state->aux.y << 16) | plane_state->aux.x); if (scaler_id >= 0) { uint32_t ps_ctrl = 0; @@ -9807,10 +9967,16 @@ skylake_get_initial_plane_config(struct intel_crtc *crtc, fb->modifier = I915_FORMAT_MOD_X_TILED; break; case PLANE_CTL_TILED_Y: - fb->modifier = I915_FORMAT_MOD_Y_TILED; + if (val & PLANE_CTL_DECOMPRESSION_ENABLE) + fb->modifier = I915_FORMAT_MOD_Y_TILED_CCS; + else + fb->modifier = I915_FORMAT_MOD_Y_TILED; break; case PLANE_CTL_TILED_YF: - fb->modifier = I915_FORMAT_MOD_Yf_TILED; + if (val & PLANE_CTL_DECOMPRESSION_ENABLE) + fb->modifier = I915_FORMAT_MOD_Yf_TILED_CCS; + else + fb->modifier = I915_FORMAT_MOD_Yf_TILED;
I'm wondering if this is actually feasible. If we find compression enabled, I'd think we should just disable it.
break; default: MISSING_CASE(tiling); @@ -12014,7 +12180,7 @@ static void skl_do_mmio_flip(struct intel_crtc *intel_crtc, u32 ctl, stride = skl_plane_stride(fb, 0, rotation); ctl = I915_READ(PLANE_CTL(pipe, 0)); - ctl &= ~PLANE_CTL_TILED_MASK; + ctl &= ~(PLANE_CTL_TILED_MASK | PLANE_CTL_DECOMPRESSION_ENABLE); switch (fb->modifier) { case DRM_FORMAT_MOD_NONE: break; @@ -12024,9 +12190,15 @@ static void skl_do_mmio_flip(struct intel_crtc *intel_crtc, case I915_FORMAT_MOD_Y_TILED: ctl |= PLANE_CTL_TILED_Y; break; + case I915_FORMAT_MOD_Y_TILED_CCS: + ctl |= PLANE_CTL_TILED_Y | PLANE_CTL_DECOMPRESSION_ENABLE; + break; case I915_FORMAT_MOD_Yf_TILED: ctl |= PLANE_CTL_TILED_YF; break; + case I915_FORMAT_MOD_Yf_TILED_CCS: + ctl |= PLANE_CTL_TILED_YF | PLANE_CTL_DECOMPRESSION_ENABLE; + break; default: MISSING_CASE(fb->modifier); } @@ -15926,8 +16098,8 @@ static int intel_framebuffer_init(struct drm_device *dev, { struct drm_i915_private *dev_priv = to_i915(dev); unsigned int tiling = i915_gem_object_get_tiling(obj); - int ret; - u32 pitch_limit, stride_alignment; + int ret, i; + u32 pitch_limit; struct drm_format_name_buf format_name; WARN_ON(!mutex_is_locked(&dev->struct_mutex)); @@ -15953,6 +16125,19 @@ static int intel_framebuffer_init(struct drm_device *dev, /* Passed in modifier sanity checking. */ switch (mode_cmd->modifier[0]) { + case I915_FORMAT_MOD_Y_TILED_CCS: + case I915_FORMAT_MOD_Yf_TILED_CCS: + switch (mode_cmd->pixel_format) { + case DRM_FORMAT_XBGR8888: + case DRM_FORMAT_ABGR8888: + case DRM_FORMAT_XRGB8888: + case DRM_FORMAT_ARGB8888: + break; + default: + DRM_DEBUG_KMS("RC supported only with RGB8888 formats\n"); + return -EINVAL; + } + /* fall through */ case I915_FORMAT_MOD_Y_TILED: case I915_FORMAT_MOD_Yf_TILED: if (INTEL_GEN(dev_priv) < 9) { @@ -16061,11 +16246,21 @@ static int intel_framebuffer_init(struct drm_device *dev, drm_helper_mode_fill_fb_struct(dev, &intel_fb->base, mode_cmd); - stride_alignment = intel_fb_stride_alignment(&intel_fb->base, 0); - if (mode_cmd->pitches[0] & (stride_alignment - 1)) { - DRM_DEBUG_KMS("pitch (%d) must be at least %u byte aligned\n", - mode_cmd->pitches[0], stride_alignment); - return -EINVAL; + for (i = 0; i < intel_fb->base.format->num_planes; i++) { + u32 stride_alignment; + + if (mode_cmd->handles[i] != mode_cmd->handles[0]) { + DRM_DEBUG_KMS("bad plane %d handle\n", i); + return -EINVAL; + } + + stride_alignment = intel_fb_stride_alignment(&intel_fb->base, i); + + if (mode_cmd->pitches[i] & (stride_alignment - 1)) { + DRM_DEBUG_KMS("plane %d pitch (%d) must be at least %u byte aligned\n", + i, mode_cmd->pitches[i], stride_alignment); + return -EINVAL; + } } intel_fb->obj = obj; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 249623d45be0..add359022c96 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3314,7 +3314,9 @@ skl_ddb_min_alloc(const struct drm_plane_state *pstate, /* For Non Y-tile return 8-blocks */ if (fb->modifier != I915_FORMAT_MOD_Y_TILED && - fb->modifier != I915_FORMAT_MOD_Yf_TILED) + fb->modifier != I915_FORMAT_MOD_Yf_TILED && + fb->modifier != I915_FORMAT_MOD_Y_TILED_CCS && + fb->modifier != I915_FORMAT_MOD_Yf_TILED_CCS) return 8; src_w = drm_rect_width(&intel_pstate->base.src) >> 16; @@ -3590,7 +3592,9 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, } y_tiled = fb->modifier == I915_FORMAT_MOD_Y_TILED || - fb->modifier == I915_FORMAT_MOD_Yf_TILED; + fb->modifier == I915_FORMAT_MOD_Yf_TILED || + fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS || + fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS; x_tiled = fb->modifier == I915_FORMAT_MOD_X_TILED; /* Display WA #1141: kbl. */ diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index 7031bc733d97..063a994815d0 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -210,6 +210,7 @@ skl_update_plane(struct drm_plane *drm_plane, u32 surf_addr = plane_state->main.offset; unsigned int rotation = plane_state->base.rotation; u32 stride = skl_plane_stride(fb, 0, rotation); + u32 aux_stride = skl_plane_stride(fb, 1, rotation); int crtc_x = plane_state->base.dst.x1; int crtc_y = plane_state->base.dst.y1; uint32_t crtc_w = drm_rect_width(&plane_state->base.dst); @@ -248,6 +249,10 @@ skl_update_plane(struct drm_plane *drm_plane, I915_WRITE(PLANE_OFFSET(pipe, plane_id), (y << 16) | x); I915_WRITE(PLANE_STRIDE(pipe, plane_id), stride); I915_WRITE(PLANE_SIZE(pipe, plane_id), (src_h << 16) | src_w); + I915_WRITE(PLANE_AUX_DIST(pipe, plane_id), + (plane_state->aux.offset - surf_addr) | aux_stride); + I915_WRITE(PLANE_AUX_OFFSET(pipe, plane_id), + (plane_state->aux.y << 16) | plane_state->aux.x); /* program plane scaler */ if (plane_state->scaler_id >= 0) {
Looks good. I haven't tested it. Reviewed-by: Ben Widawsky <ben@xxxxxxxxxxxx> _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx