On 06/09/2021 17:55, Thomas Hellström wrote:
Just evict unpinned objects to system. For pinned LMEM objects, make a backup system object and blit the contents to that. Backup is performed in three steps, 1: Opportunistically evict evictable objects using the gpu blitter. 2: After gt idle, evict evictable objects using the gpu blitter. This will be modified in an upcoming patch to backup pinned objects that are not used by the blitter itself. 3: Backup remaining pinned objects using memcpy. Also move uC suspend to after 2) to make sure we have a functional GuC during 2) if using GuC submission. v2: - Major refactor to make sure gem_exec_suspend@hang-SX subtests work, and suspend / resume works with a slightly modified GuC submission enabling patch series. Signed-off-by: Thomas Hellström <thomas.hellstrom@xxxxxxxxxxxxxxx>
<snip>
+ +static int i915_ttm_backup(struct i915_gem_apply_to_region *apply, + struct drm_i915_gem_object *obj) +{ + struct i915_gem_ttm_pm_apply *pm_apply = + container_of(apply, typeof(*pm_apply), base); + struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); + struct ttm_buffer_object *backup_bo; + struct drm_i915_private *i915 = + container_of(bo->bdev, typeof(*i915), bdev); + struct intel_memory_region *sys_region; + struct drm_i915_gem_object *backup; + struct ttm_operation_ctx ctx = {}; + int err = 0; + + if (bo->resource->mem_type == I915_PL_SYSTEM || obj->ttm.backup) + return 0; + + if (pm_apply->allow_gpu && i915_gem_object_evictable(obj)) + return ttm_bo_validate(bo, i915_ttm_sys_placement(), &ctx); + + if (!pm_apply->backup_pinned) + return 0; + + sys_region = i915->mm.regions[INTEL_REGION_SMEM]; + backup = i915_gem_object_create_region(sys_region, + obj->base.size, + 0, 0);
create_shmem()?
+ if (IS_ERR(backup)) + return PTR_ERR(backup); + + err = i915_gem_object_lock(backup, apply->ww); + if (err) + goto out_no_lock; + + backup_bo = i915_gem_to_ttm(backup); + err = ttm_tt_populate(backup_bo->bdev, backup_bo->ttm, &ctx); + if (err) + goto out_no_populate; + + err = i915_gem_obj_copy_ttm(backup, obj, pm_apply->allow_gpu, false); + GEM_WARN_ON(err); + + obj->ttm.backup = backup; + return 0; + +out_no_populate: + i915_gem_ww_unlock_single(backup); +out_no_lock: + i915_gem_object_put(backup); + + return err; +} + +static int i915_ttm_recover(struct i915_gem_apply_to_region *apply, + struct drm_i915_gem_object *obj) +{ + i915_ttm_backup_free(obj); + return 0; +} + +/** + * i915_ttm_recover_region - Free the backup of all objects of a region + * @mr: The memory region + * + * Checks all objects of a region if there is backup attached and if so + * frees that backup. Typically this is called to recover after a partially + * performed backup. + */ +void i915_ttm_recover_region(struct intel_memory_region *mr) +{ + static const struct i915_gem_apply_to_region_ops recover_ops = { + .process_obj = i915_ttm_recover, + }; + struct i915_gem_apply_to_region apply = {.ops = &recover_ops}; + int ret; + + ret = i915_gem_process_region(mr, &apply); + GEM_WARN_ON(ret); +} + +/** + * i915_ttm_backup_region - Back up all objects of a region to smem. + * @mr: The memory region + * @allow_gpu: Whether to allow the gpu blitter for this backup. + * @backup_pinned: Backup also pinned objects. + * + * Loops over all objects of a region and either evicts them if they are + * evictable or backs them up using a backup object if they are pinned. + * + * Return: Zero on success. Negative error code on error. + */ +int i915_ttm_backup_region(struct intel_memory_region *mr, bool allow_gpu, + bool backup_pinned) +{ + static const struct i915_gem_apply_to_region_ops backup_ops = { + .process_obj = i915_ttm_backup, + }; + struct i915_gem_ttm_pm_apply pm_apply = { + .base = {.ops = &backup_ops}, + .allow_gpu = allow_gpu, + .backup_pinned = backup_pinned, + }; + + return i915_gem_process_region(mr, &pm_apply.base); +} + +static int i915_ttm_restore(struct i915_gem_apply_to_region *apply, + struct drm_i915_gem_object *obj) +{ + struct i915_gem_ttm_pm_apply *pm_apply = + container_of(apply, typeof(*pm_apply), base); + struct drm_i915_gem_object *backup = obj->ttm.backup; + struct ttm_buffer_object *backup_bo = i915_gem_to_ttm(backup); + struct ttm_operation_ctx ctx = {}; + int err; + + if (!obj->ttm.backup)
if (!backup)
+ return 0; + + if (!pm_apply->allow_gpu && (obj->flags & I915_BO_ALLOC_USER)) + return 0; + + err = i915_gem_object_lock(backup, apply->ww); + if (err) + return err; + + /* Content may have been swapped. */ + err = ttm_tt_populate(backup_bo->bdev, backup_bo->ttm, &ctx); + if (!err) { + err = i915_gem_obj_copy_ttm(obj, backup, pm_apply->allow_gpu, + false); + GEM_WARN_ON(err); + + obj->ttm.backup = NULL; + err = 0; + } + + i915_gem_ww_unlock_single(backup); + i915_gem_object_put(backup);
I assume we need to set ttm.backup = NULL somewhere here on the failure path, or don't drop the ref? Or at least it looks like potential uaf later?
+ + return err; +} +