On i915 GuC SLPC will only be able to really control the freq if we disable the waitboost. So this is a hacky and experimental only change with the intention to debug the available metrics with i915. Cc: Jani Nikula <jani.nikula@xxxxxxxxx> Cc: Lucas De Marchi <lucas.demarchi@xxxxxxxxx> Cc: Vinay Belgaumkar <vinay.belgaumkar@xxxxxxxxx> Signed-off-by: Rodrigo Vivi <rodrigo.vivi@xxxxxxxxx> --- drivers/gpu/drm/i915/gt/intel_rps.c | 9 + .../drm/i915/gt/uc/abi/guc_actions_slpc_abi.h | 59 ++++ drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 271 +++++++++++++++++- drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h | 6 + .../gpu/drm/i915/gt/uc/intel_guc_slpc_types.h | 4 + drivers/gpu/drm/i915/i915_params.c | 2 +- drivers/gpu/drm/i915/i915_params.h | 4 +- 7 files changed, 345 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 4feef874e6d6..20fe9d6c1953 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -1010,6 +1010,15 @@ void intel_rps_boost(struct i915_request *rq) { struct intel_guc_slpc *slpc; + /* + * XXX: For now, skip the boost itself, but later the full machinery + * of the waitboost including some variable handles and extra locks + * could be avoided. + */ + if (rq->i915->params.enable_guc & ENABLE_GUC_SLPC_VBLANK || + rq->i915->params.enable_guc & ENABLE_GUC_SLPC_FLIP) + return; + if (i915_request_signaled(rq) || i915_request_has_waitboost(rq)) return; diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h index 811add10c30d..8a2fdfd20c0d 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h @@ -122,6 +122,11 @@ enum slpc_param_id { SLPC_MAX_PARAM = 32, }; +#define SLPC_OPTIMIZED_STRATEGIES_COMPUTE REG_BIT(0) +#define SLPC_OPTIMIZED_STRATEGIES_ASYNC_FLIP REG_BIT(1) +#define SLPC_OPTIMIZED_STRATEGIES_MEDIA REG_BIT(2) +#define SLPC_OPTIMIZED_STRATEGIES_VSYNC_FLIP REG_BIT(3) + enum slpc_media_ratio_mode { SLPC_MEDIA_RATIO_MODE_DYNAMIC_CONTROL = 0, SLPC_MEDIA_RATIO_MODE_FIXED_ONE_TO_ONE = 1, @@ -207,6 +212,60 @@ struct slpc_shared_data { u8 reserved_mode_definition[4096]; } __packed; +#define SLPC_MAX_PIPES 8 +#define SLPC_MAX_PLANES_PER_PIPE 8 + +struct slpc_display_global_info { + u32 version:8; + u32 num_pipes:4; + u32 num_planes_per_pipe:4; + u32 reserved_1:16; + u32 refresh_count:16; + u32 vblank_count:16; + u32 flip_count:16; + u32 reserved_2:16; + u32 reserved_3[13]; +} __packed; + +struct slpc_display_refresh_info { + u32 refresh_interval:16; + u32 is_variable:1; + u32 reserved:15; +} __packed; + +/* + * The host must update each 32-bit part with a single atomic write so + * that SLPC will read the contained bit fields together. The host must + * update the two parts in order - total flip count and timestamp first, + * vsync and async flip counts second. + * Hence, these items are not defined with individual bitfields. + */ +#define SLPC_FLIP_P1_LAST REG_GENMASK(31, 7) +#define SLPC_FLIP_P1_TOTAL_COUNT REG_GENMASK(6, 0) +#define SLPC_FLIP_P2_ASYNC_COUNT REG_GENMASK(31, 16) +#define SLPC_FLIP_P2_VSYNC_COUNT REG_GENMASK(15, 0) + +struct slpc_display_flip_metrics { + u32 part1; + u32 part2; +} __packed; + +/* + * The host must update this 32-bit structure with a single atomic write + * so that SLPC will read the count and timestamp together. + * Hence, this item is not defined with individual bitfields. + */ +#define SLPC_VBLANK_LAST REG_GENMASK(31, 7) +#define SLPC_VBLANK_COUNT REG_GENMASK(6, 0) + +struct slpc_display_data { + struct slpc_display_global_info global_info; + struct slpc_display_refresh_info refresh_info[SLPC_MAX_PIPES]; + u32 vblank_metrics[SLPC_MAX_PIPES]; + struct slpc_display_flip_metrics + flip_metrics[SLPC_MAX_PIPES][SLPC_MAX_PLANES_PER_PIPE]; +} __packed; + /** * DOC: SLPC H2G MESSAGE FORMAT * diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index 3e681ab6fbf9..911f54e8fa19 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -8,6 +8,8 @@ #include "i915_drv.h" #include "i915_reg.h" +#include "display/intel_display_guc_metrics.h" +#include "display/intel_display_guc_metrics_types.h" #include "intel_guc_slpc.h" #include "intel_guc_print.h" #include "intel_mchbar_regs.h" @@ -241,20 +243,179 @@ static void slpc_boost_work(struct work_struct *work) mutex_unlock(&slpc->lock); } +static void slpc_display_data_init(struct slpc_display_data *data, int version, + int num_pipes, int num_planes_per_pipe) +{ + data->global_info.version = version; + data->global_info.num_pipes = num_pipes; + data->global_info.num_planes_per_pipe = num_planes_per_pipe; +} + +static void slpc_refresh_info(struct slpc_display_data *data, int pipe, + u32 refresh_interval, bool vrr_enabled) +{ + data->refresh_info[pipe].refresh_interval = refresh_interval; + data->refresh_info[pipe].is_variable = vrr_enabled; + data->global_info.refresh_count += 1; +} + +static void slpc_vblank(struct slpc_display_data *data, int pipe, + u32 timestamp) +{ + u32 vblank; + + vblank = data->vblank_metrics[pipe]; + + vblank = REG_FIELD_GET(SLPC_VBLANK_COUNT, vblank); + vblank = REG_FIELD_PREP(SLPC_VBLANK_COUNT, vblank + 1); + vblank |= REG_FIELD_PREP(SLPC_VBLANK_LAST, timestamp); + + data->vblank_metrics[pipe] = vblank; + data->global_info.vblank_count += 1; +} + +static void slpc_flip(struct slpc_display_data *data, int pipe, int plane, + bool async_flip, u32 timestamp) +{ + u32 part1, part2, count; + + part1 = data->flip_metrics[pipe][plane].part1; + part2 = data->flip_metrics[pipe][plane].part2; + + part1 = REG_FIELD_GET(SLPC_FLIP_P1_TOTAL_COUNT, part1); + part1 = REG_FIELD_PREP(SLPC_FLIP_P1_TOTAL_COUNT, part1 + 1); + part1 |= REG_FIELD_PREP(SLPC_FLIP_P1_LAST, timestamp); + + if (async_flip) { + count = REG_FIELD_GET(SLPC_FLIP_P2_ASYNC_COUNT, part2); + part2 &= ~SLPC_FLIP_P2_ASYNC_COUNT; + part2 |= REG_FIELD_PREP(SLPC_FLIP_P2_ASYNC_COUNT, count + 1); + } else { + count = REG_FIELD_GET(SLPC_FLIP_P2_VSYNC_COUNT, part2); + part2 &= ~SLPC_FLIP_P2_VSYNC_COUNT; + part2 |= REG_FIELD_PREP(SLPC_FLIP_P2_VSYNC_COUNT, count + 1); + } + + data->flip_metrics[pipe][plane].part1 = part1; + data->flip_metrics[pipe][plane].part2 = part2; + + data->global_info.flip_count += 1; +} + +static void intel_guc_slpc_refresh_info_update(void *gfx_device, int pipe, + u32 refresh_interval, + bool vrr_enabled) +{ + struct drm_i915_private *i915 = gfx_device; + struct intel_gt *gt; + int i; + + if (pipe > SLPC_MAX_PIPES) { + drm_err(&i915->drm, "GuC PC Max display pipe exceeded\n"); + return; + } + + for_each_gt(gt, i915, i) + slpc_refresh_info(gt->uc.guc.slpc.display.vaddr, pipe, + refresh_interval, vrr_enabled); +} + +#define MCHBAR_BCLK_COUNT _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5984) +#define MTL_BCLK_COUNT _MMIO(0xc28) +#define TIMESTAMP_MASK REG_GENMASK(30, 6) + +static u32 bclk_read_timestamp(struct intel_gt *gt) +{ + u32 timestamp; + + if (IS_METEORLAKE(gt->i915)) + timestamp = intel_uncore_read_fw(gt->uncore, MTL_BCLK_COUNT); + else + timestamp = intel_uncore_read_fw(gt->uncore, MCHBAR_BCLK_COUNT); + + return REG_FIELD_GET(TIMESTAMP_MASK, timestamp); +} + +static void intel_guc_slpc_vblank_update(void *gfx_device, int pipe) +{ + struct drm_i915_private *i915 = gfx_device; + struct intel_gt *gt; + u32 timestamp; + int i; + + if (!(i915->params.enable_guc & ENABLE_GUC_SLPC_VBLANK)) + return; + + if (pipe > SLPC_MAX_PIPES) { + drm_err(&i915->drm, "GuC PC Max display pipe exceeded\n"); + return; + } + + for_each_gt(gt, i915, i) { + timestamp = bclk_read_timestamp(gt); + slpc_vblank(gt->uc.guc.slpc.display.vaddr, pipe, timestamp); + } +} + +static void intel_guc_slpc_flip_update(void *gfx_device, int pipe, int plane, + bool async_flip) +{ + struct drm_i915_private *i915 = gfx_device; + struct intel_gt *gt; + u32 timestamp; + int i; + + if (!(i915->params.enable_guc & ENABLE_GUC_SLPC_FLIP)) + return; + + if (pipe > SLPC_MAX_PIPES) { + drm_err(&i915->drm, "GuC PC Max display pipe exceeded\n"); + return; + } + + if (plane > SLPC_MAX_PLANES_PER_PIPE) { + drm_err(&i915->drm, "GuC PC Max display planes exceeded\n"); + return; + } + + for_each_gt(gt, i915, i) { + timestamp = bclk_read_timestamp(gt); + slpc_flip(gt->uc.guc.slpc.display.vaddr, pipe, plane, + async_flip, timestamp); + } +} + +static struct intel_display_guc_metrics guc_metrics = { + .refresh_info_update = intel_guc_slpc_refresh_info_update, + .vblank_update = intel_guc_slpc_vblank_update, + .flip_update = intel_guc_slpc_flip_update, +}; + int intel_guc_slpc_init(struct intel_guc_slpc *slpc) { struct intel_guc *guc = slpc_to_guc(slpc); - u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data)); + struct drm_i915_private *i915 = guc_to_i915(guc); + u32 size; int err; GEM_BUG_ON(slpc->vma); + size = PAGE_ALIGN(sizeof(struct slpc_shared_data)); err = intel_guc_allocate_and_map_vma(guc, size, &slpc->vma, (void **)&slpc->vaddr); if (unlikely(err)) { guc_probe_error(guc, "Failed to allocate SLPC struct: %pe\n", ERR_PTR(err)); return err; } + size = PAGE_ALIGN(sizeof(struct slpc_display_data)); + err = intel_guc_allocate_and_map_vma(guc, size, &slpc->display.vma, + (void **)&slpc->display.vaddr); + if (unlikely(err)) { + guc_probe_error(guc, "Failed to allocate SLPC's display struct: %pe\n", + ERR_PTR(err)); + return err; + } + slpc->max_freq_softlimit = 0; slpc->min_freq_softlimit = 0; slpc->ignore_eff_freq = false; @@ -265,6 +426,10 @@ int intel_guc_slpc_init(struct intel_guc_slpc *slpc) slpc->num_boosts = 0; slpc->media_ratio_mode = SLPC_MEDIA_RATIO_MODE_DYNAMIC_CONTROL; + if (i915->params.enable_guc & ENABLE_GUC_SLPC_VBLANK || + i915->params.enable_guc & ENABLE_GUC_SLPC_FLIP) + intel_display_guc_metrics_init(i915, &i915->display, &guc_metrics); + mutex_init(&slpc->lock); INIT_WORK(&slpc->boost_work, slpc_boost_work); @@ -357,12 +522,28 @@ static u32 slpc_decode_max_freq(struct intel_guc_slpc *slpc) GT_FREQUENCY_MULTIPLIER, GEN9_FREQ_SCALER); } -static void slpc_shared_data_reset(struct slpc_shared_data *data) +static void slpc_shared_display_data_reset(struct intel_guc_slpc *slpc) { - memset(data, 0, sizeof(struct slpc_shared_data)); + struct slpc_shared_data *data = slpc->vaddr; + struct slpc_display_data *display_data = slpc->display.vaddr; + + memset(display_data, 0, sizeof(struct slpc_display_data)); + + slpc_display_data_init(slpc->display.vaddr, 1, SLPC_MAX_PIPES, + SLPC_MAX_PLANES_PER_PIPE); + data->header.display_data_addr = intel_guc_ggtt_offset(slpc_to_guc(slpc), + slpc->display.vma); +} + +static void slpc_shared_data_reset(struct intel_guc_slpc *slpc) +{ + struct slpc_shared_data *data = slpc->vaddr; + memset(data, 0, sizeof(struct slpc_shared_data)); data->header.size = sizeof(struct slpc_shared_data); + slpc_shared_display_data_reset(slpc); + /* Enable only GTPERF task, disable others */ slpc_mem_set_enabled(data, SLPC_PARAM_TASK_ENABLE_GTPERF, SLPC_PARAM_TASK_DISABLE_GTPERF); @@ -672,7 +853,7 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc) GEM_BUG_ON(!slpc->vma); - slpc_shared_data_reset(slpc->vaddr); + slpc_shared_data_reset(slpc); ret = slpc_reset(slpc); if (unlikely(ret < 0)) { @@ -711,6 +892,10 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc) /* Set cached media freq ratio mode */ intel_guc_slpc_set_media_ratio_mode(slpc, slpc->media_ratio_mode); + slpc_set_param_nb(slpc, SLPC_PARAM_STRATEGIES, + SLPC_OPTIMIZED_STRATEGIES_VSYNC_FLIP | + SLPC_OPTIMIZED_STRATEGIES_ASYNC_FLIP); + return 0; } @@ -755,6 +940,67 @@ void intel_guc_slpc_dec_waiters(struct intel_guc_slpc *slpc) mutex_unlock(&slpc->lock); } +static void slpc_print_display_metrics(struct drm_printer *p, + struct slpc_display_data *data) +{ + int pipe, plane; + u32 val; + + drm_printf(p, "\nSLPC Display Data - Globals:\n"); + drm_printf(p, "\tVersion: %d\n", data->global_info.version); + drm_printf(p, "\tNum Pipes: %d\n", data->global_info.num_pipes); + drm_printf(p, "\tNum Planes per Pipe: %d\n", + data->global_info.num_planes_per_pipe); + drm_printf(p, "\tGlobal Refresh Info Count: %d\n", + data->global_info.refresh_count); + drm_printf(p, "\tGlobal Vblank Count: %d\n", + data->global_info.vblank_count); + drm_printf(p, "\tGlobal Flip Count: %d\n", + data->global_info.flip_count); + + for (pipe = 0; pipe < SLPC_MAX_PIPES; pipe++) { + + if (!data->refresh_info[pipe].refresh_interval) + continue; + + drm_printf(p, "\nSLPC Display Data - Refresh Info - Pipe[%d]:\n", + pipe); + drm_printf(p, "\tRefresh Interval: %d\n", + data->refresh_info[pipe].refresh_interval); + drm_printf(p, "\tIS VRR: %d\n", + data->refresh_info[pipe].is_variable); + + drm_printf(p, "SLPC Display Data - Vblank Info - Pipe[%d]:\n", + pipe); + val = data->vblank_metrics[pipe]; + drm_printf(p, "\tVBlank Last Timestamp: %x\n", + REG_FIELD_GET(SLPC_VBLANK_LAST, val)); + drm_printf(p, "\tVBlank Count: %d\n", + REG_FIELD_GET(SLPC_VBLANK_COUNT, val)); + + drm_printf(p, "SLPC Display Data - Flip Info - Pipe[%d]:\n", pipe); + for (plane = 0; plane < SLPC_MAX_PLANES_PER_PIPE; plane++) { + + val = data->flip_metrics[pipe][plane].part1; + if (!val) + continue; + + drm_printf(p, "\tFlip Info - Plane[%d]:\n", plane); + drm_printf(p, "\t\tFlip Last Timestamp: %x\n", + REG_FIELD_GET(SLPC_FLIP_P1_LAST, val)); + drm_printf(p, "\t\tFlip Total Count: %d\n", + REG_FIELD_GET(SLPC_FLIP_P1_TOTAL_COUNT, val)); + + val = data->flip_metrics[pipe][plane].part2; + + drm_printf(p, "\t\tFlip Async Count: %d\n", + REG_FIELD_GET(SLPC_FLIP_P2_ASYNC_COUNT, val)); + drm_printf(p, "\t\tFlip Vsync Count: %d\n", + REG_FIELD_GET(SLPC_FLIP_P2_VSYNC_COUNT, val)); + } + } +} + int intel_guc_slpc_print_info(struct intel_guc_slpc *slpc, struct drm_printer *p) { struct drm_i915_private *i915 = slpc_to_i915(slpc); @@ -778,10 +1024,18 @@ int intel_guc_slpc_print_info(struct intel_guc_slpc *slpc, struct drm_printer *p slpc_decode_max_freq(slpc)); drm_printf(p, "\tMin freq: %u MHz\n", slpc_decode_min_freq(slpc)); - drm_printf(p, "\twaitboosts: %u\n", - slpc->num_boosts); - drm_printf(p, "\tBoosts outstanding: %u\n", - atomic_read(&slpc->num_waiters)); + + if (i915->params.enable_guc & ENABLE_GUC_SLPC_VBLANK || + i915->params.enable_guc & ENABLE_GUC_SLPC_FLIP) { + if (data->header.display_data_addr) + slpc_print_display_metrics(p, + slpc->display.vaddr); + } else { + drm_printf(p, "\twaitboosts: %u\n", + slpc->num_boosts); + drm_printf(p, "\tBoosts outstanding: %u\n", + atomic_read(&slpc->num_waiters)); + } } } @@ -793,5 +1047,6 @@ void intel_guc_slpc_fini(struct intel_guc_slpc *slpc) if (!slpc->vma) return; + i915_vma_unpin_and_release(&slpc->display.vma, I915_VMA_RELEASE_MAP); i915_vma_unpin_and_release(&slpc->vma, I915_VMA_RELEASE_MAP); } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h index 6ac6503c39d4..71f33e4aceaf 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h @@ -45,5 +45,11 @@ void intel_guc_pm_intrmsk_enable(struct intel_gt *gt); void intel_guc_slpc_boost(struct intel_guc_slpc *slpc); void intel_guc_slpc_dec_waiters(struct intel_guc_slpc *slpc); int intel_guc_slpc_set_ignore_eff_freq(struct intel_guc_slpc *slpc, bool val); +void intel_guc_slpc_display_init(struct intel_guc_slpc *slpc); +void intel_guc_slpc_refresh_info(struct intel_guc_slpc *slpc, int pipe, + u32 refresh_info, bool vrr_enabled); +void intel_guc_slpc_vblank(struct intel_guc_slpc *slpc, int pipe, u64 count); +void intel_guc_slpc_flip(struct intel_guc_slpc *slpc, int pipe, int plane, + bool vrr_enabled); #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h index a88651331497..edd944d3be8a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h @@ -16,6 +16,10 @@ struct intel_guc_slpc { struct i915_vma *vma; struct slpc_shared_data *vaddr; + struct { + struct i915_vma *vma; + struct slpc_display_data *vaddr; + } display; bool supported; bool selected; diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index de43048543e8..f86968feabc8 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -96,7 +96,7 @@ i915_param_named(mmio_debug, int, 0400, i915_param_named_unsafe(enable_guc, int, 0400, "Enable GuC load for GuC submission and/or HuC load. " "Required functionality can be selected using bitmask values. " - "(-1=auto [default], 0=disable, 1=GuC submission, 2=HuC load)"); + "(-1=auto [default], 0=disable, 1=GuC submission, 2=HuC load, 7=GuC SLPC w/VBlank-info, 11=GuC SLPC w/ Flip-info, 15=GuC SLPC Full Display Metrics (no waitboost))"); i915_param_named(guc_log_level, int, 0400, "GuC firmware logging level. Requires GuC to be loaded. " diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h index 1315d7fac850..1b19a91f2b4d 100644 --- a/drivers/gpu/drm/i915/i915_params.h +++ b/drivers/gpu/drm/i915/i915_params.h @@ -32,7 +32,9 @@ struct drm_printer; #define ENABLE_GUC_SUBMISSION BIT(0) #define ENABLE_GUC_LOAD_HUC BIT(1) -#define ENABLE_GUC_MASK GENMASK(1, 0) +#define ENABLE_GUC_SLPC_VBLANK BIT(2) +#define ENABLE_GUC_SLPC_FLIP BIT(3) +#define ENABLE_GUC_MASK GENMASK(3, 0) /* * Invoke param, a function-like macro, for each i915 param, with arguments: -- 2.43.2