On 9/6/21 6:55 PM, Thomas Hellström wrote:
Just evict unpinned objects to system. For pinned LMEM objects,
make a backup system object and blit the contents to that.
Backup is performed in three steps,
1: Opportunistically evict evictable objects using the gpu blitter.
2: After gt idle, evict evictable objects using the gpu blitter. This will
be modified in an upcoming patch to backup pinned objects that are not used
by the blitter itself.
3: Backup remaining pinned objects using memcpy.
Also move uC suspend to after 2) to make sure we have a functional GuC
during 2) if using GuC submission.
v2:
- Major refactor to make sure gem_exec_suspend@hang-SX subtests work, and
suspend / resume works with a slightly modified GuC submission enabling
patch series.
Signed-off-by: Thomas Hellström <thomas.hellstrom@xxxxxxxxxxxxxxx>
---
drivers/gpu/drm/i915/Makefile | 1 +
.../gpu/drm/i915/gem/i915_gem_object_types.h | 1 +
drivers/gpu/drm/i915/gem/i915_gem_pm.c | 92 +++++++-
drivers/gpu/drm/i915/gem/i915_gem_pm.h | 3 +-
drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 29 ++-
drivers/gpu/drm/i915/gem/i915_gem_ttm.h | 10 +
drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c | 205 ++++++++++++++++++
drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.h | 24 ++
drivers/gpu/drm/i915/gt/intel_gt_pm.c | 4 +-
drivers/gpu/drm/i915/i915_drv.c | 10 +-
drivers/gpu/drm/i915/i915_drv.h | 2 +-
11 files changed, 364 insertions(+), 17 deletions(-)
create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c
create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.h
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index c36c8a4f0716..3379a0a6c91e 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -155,6 +155,7 @@ gem-y += \
gem/i915_gem_throttle.o \
gem/i915_gem_tiling.o \
gem/i915_gem_ttm.o \
+ gem/i915_gem_ttm_pm.o \
gem/i915_gem_userptr.o \
gem/i915_gem_wait.o \
gem/i915_gemfs.o
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 2471f36aaff3..734cc8e16481 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -534,6 +534,7 @@ struct drm_i915_gem_object {
struct {
struct sg_table *cached_io_st;
struct i915_gem_object_page_iter get_io_page;
+ struct drm_i915_gem_object *backup;
bool created:1;
} ttm;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
index 8b9d7d14c4bd..9746c255ddcc 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
@@ -5,6 +5,7 @@
*/
#include "gem/i915_gem_pm.h"
+#include "gem/i915_gem_ttm_pm.h"
#include "gt/intel_gt.h"
#include "gt/intel_gt_pm.h"
#include "gt/intel_gt_requests.h"
@@ -39,7 +40,79 @@ void i915_gem_suspend(struct drm_i915_private *i915)
i915_gem_drain_freed_objects(i915);
}
-void i915_gem_suspend_late(struct drm_i915_private *i915)
+static int lmem_restore(struct drm_i915_private *i915, bool allow_gpu)
+{
+ struct intel_memory_region *mr;
+ int ret = 0, id;
+
+ for_each_memory_region(mr, i915, id) {
+ if (mr->type == INTEL_MEMORY_LOCAL) {
+ ret = i915_ttm_restore_region(mr, allow_gpu);
+ if (ret)
+ break;
+ }
+ }
+
+ return ret;
+}
+
+static int lmem_suspend(struct drm_i915_private *i915, bool allow_gpu,
+ bool backup_pinned)
+{
+ struct intel_memory_region *mr;
+ int ret = 0, id;
+
+ for_each_memory_region(mr, i915, id) {
+ if (mr->type == INTEL_MEMORY_LOCAL) {
+ ret = i915_ttm_backup_region(mr, allow_gpu, backup_pinned);
+ if (ret)
+ break;
+ }
+ }
+
+ return ret;
+}
+
+static void lmem_recover(struct drm_i915_private *i915)
+{
+ struct intel_memory_region *mr;
+ int id;
+
+ for_each_memory_region(mr, i915, id)
+ if (mr->type == INTEL_MEMORY_LOCAL)
+ i915_ttm_recover_region(mr);
+}
+
+int i915_gem_backup_suspend(struct drm_i915_private *i915)
+{
+ int ret;
+
+ /* Opportunistically try to evict unpinned objects */
+ ret = lmem_suspend(i915, true, false);
+ if (ret)
+ goto out_recover;
+
+ i915_gem_suspend(i915);
+
+ /*
+ * More objects may have become unpinned as requests were
+ * retired. Now try to evict again. The gt may be wedged here
+ * in which case we automatically fall back to memcpy.
+ */
+
+ ret = lmem_suspend(i915, true, false);
+ if (ret)
+ goto out_recover;
+
+ return 0;
+
+out_recover:
+ lmem_recover(i915);
+
+ return ret;
+}
+
+int i915_gem_suspend_late(struct drm_i915_private *i915)
{
struct drm_i915_gem_object *obj;
struct list_head *phases[] = {
@@ -49,6 +122,13 @@ void i915_gem_suspend_late(struct drm_i915_private *i915)
}, **phase;
unsigned long flags;
bool flush = false;
+ int ret;
+
+ ret = lmem_suspend(i915, false, true);
+ if (ret) {
+ lmem_recover(i915);
+ return ret;
+ }
Actually, lmem_suspend() here gets called also at driver unload which is
undesirable. We only want to call it on suspend. However it seems like
it's perfectly fine to move this to just after the previous lmem_suspend
call, since we're no longer using the migrate context.
/Thomas