Part of what xe_bo_restore_kernel does, is restore BO's GGTT mappings which may have been lost during a power state change. Missing is restoring the GGTT entries without BO mappings to a known state (e.g., scratch pages). Update xe_bo_restore_kernel to clear the entire GGTT before restoring BO's GGTT mappings. v2: - Include missing local change of tile and id variable (CI) v3: - Fixed kernel doc (CI) v4: - Only clear holes (CI) Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: Lucas De Marchi <lucas.demarchi@xxxxxxxxx> Cc: Matthew Auld <matthew.auld@xxxxxxxxx> Signed-off-by: Matthew Brost <matthew.brost@xxxxxxxxx> Cc: <stable@xxxxxxxxxxxxxxx> # v6.8+ --- drivers/gpu/drm/xe/xe_bo_evict.c | 8 +++++++- drivers/gpu/drm/xe/xe_ggtt.c | 19 ++++++++++++++++--- drivers/gpu/drm/xe/xe_ggtt.h | 2 ++ 3 files changed, 25 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c index 8fb2be061003..d7bb3dbb41d6 100644 --- a/drivers/gpu/drm/xe/xe_bo_evict.c +++ b/drivers/gpu/drm/xe/xe_bo_evict.c @@ -123,7 +123,8 @@ int xe_bo_evict_all(struct xe_device *xe) * @xe: xe device * * Move kernel BOs from temporary (typically system) memory to VRAM via CPU. All - * moves done via TTM calls. + * moves done via TTM calls. All GGTT are restored too, first by clearing GGTT + * to known state and then restoring individual BO's GGTT mappings. * * This function should be called early, before trying to init the GT, on device * resume. @@ -131,8 +132,13 @@ int xe_bo_evict_all(struct xe_device *xe) int xe_bo_restore_kernel(struct xe_device *xe) { struct xe_bo *bo; + struct xe_tile *tile; + u8 id; int ret; + for_each_tile(tile, xe, id) + xe_ggtt_clear(tile->mem.ggtt); + spin_lock(&xe->pinned.lock); for (;;) { bo = list_first_entry_or_null(&xe->pinned.evicted, diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 558fac8bb6fb..2fc498b89878 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -140,7 +140,7 @@ static void xe_ggtt_set_pte_and_flush(struct xe_ggtt *ggtt, u64 addr, u64 pte) ggtt_update_access_counter(ggtt); } -static void xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size) +static void __xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size) { u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[XE_CACHE_WB]; u64 end = start + size - 1; @@ -160,6 +160,19 @@ static void xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size) } } +static void xe_ggtt_initial_clear(struct xe_ggtt *ggtt); + +/** + * xe_ggtt_clear() - GGTT clear + * @ggtt: the &xe_ggtt to be cleared + * + * Clear all GGTT to a known state + */ +void xe_ggtt_clear(struct xe_ggtt *ggtt) +{ + xe_ggtt_initial_clear(ggtt); +} + static void ggtt_fini_early(struct drm_device *drm, void *arg) { struct xe_ggtt *ggtt = arg; @@ -277,7 +290,7 @@ static void xe_ggtt_initial_clear(struct xe_ggtt *ggtt) /* Display may have allocated inside ggtt, so be careful with clearing here */ mutex_lock(&ggtt->lock); drm_mm_for_each_hole(hole, &ggtt->mm, start, end) - xe_ggtt_clear(ggtt, start, end - start); + __xe_ggtt_clear(ggtt, start, end - start); xe_ggtt_invalidate(ggtt); mutex_unlock(&ggtt->lock); @@ -294,7 +307,7 @@ static void ggtt_node_remove(struct xe_ggtt_node *node) mutex_lock(&ggtt->lock); if (bound) - xe_ggtt_clear(ggtt, node->base.start, node->base.size); + __xe_ggtt_clear(ggtt, node->base.start, node->base.size); drm_mm_remove_node(&node->base); node->base.size = 0; mutex_unlock(&ggtt->lock); diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h index 27e7d67de004..b7ae440cdebf 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.h +++ b/drivers/gpu/drm/xe/xe_ggtt.h @@ -13,6 +13,8 @@ struct drm_printer; int xe_ggtt_init_early(struct xe_ggtt *ggtt); int xe_ggtt_init(struct xe_ggtt *ggtt); +void xe_ggtt_clear(struct xe_ggtt *ggtt); + struct xe_ggtt_node *xe_ggtt_node_init(struct xe_ggtt *ggtt); void xe_ggtt_node_fini(struct xe_ggtt_node *node); int xe_ggtt_node_insert_balloon(struct xe_ggtt_node *node, -- 2.34.1