Re: [PATCH 1/4] drm/ttm: add a pointer to the allocating BO into ttm_resource

Thomas Hellström (Intel) <thomas_os@xxxxxxxxxxxx> · Fri, 11 Jun 2021 07:34:04 +0200

Hi, Christian,

I know you have a lot on your plate, and that the drm community is a bit 
lax about following the kernel patch submitting guidelines, but now that 
we're also spinning up a number of Intel developers on TTM could we 
please make a better effort with cover letters and commit messages so 
that they understand what the purpose and end goal of the series is. A 
reviewer shouldn't have to look at the last patch to try to get an 
understanding what the series is doing and why.

On 6/10/21 1:05 PM, Christian König wrote:
We are going to need this for the next patch


and it allows us to clean
up amdgpu as well.

The amdgpu changes are not reflected in the commit title.



Signed-off-by: Christian König <christian.koenig@xxxxxxx>
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c | 47 ++++++++-------------
  drivers/gpu/drm/ttm/ttm_resource.c          |  1 +
  include/drm/ttm/ttm_resource.h              |  1 +
  3 files changed, 19 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
index 194f9eecf89c..8e3f5da44e4f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
@@ -26,23 +26,12 @@
  
  #include "amdgpu.h"
  
-struct amdgpu_gtt_node {
-	struct ttm_buffer_object *tbo;
-	struct ttm_range_mgr_node base;
-};
-
  static inline struct amdgpu_gtt_mgr *
  to_gtt_mgr(struct ttm_resource_manager *man)
  {
  	return container_of(man, struct amdgpu_gtt_mgr, manager);
  }
  
-static inline struct amdgpu_gtt_node *
-to_amdgpu_gtt_node(struct ttm_resource *res)
-{
-	return container_of(res, struct amdgpu_gtt_node, base.base);
-}
-
  /**
   * DOC: mem_info_gtt_total
   *
@@ -107,9 +96,9 @@ const struct attribute_group amdgpu_gtt_mgr_attr_group = {
   */
  bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_resource *res)
  {
-	struct amdgpu_gtt_node *node = to_amdgpu_gtt_node(res);
+	struct ttm_range_mgr_node *node = to_ttm_range_mgr_node(res);
  
-	return drm_mm_node_allocated(&node->base.mm_nodes[0]);
+	return drm_mm_node_allocated(&node->mm_nodes[0]);
  }
  
  /**
@@ -129,7 +118,7 @@ static int amdgpu_gtt_mgr_new(struct ttm_resource_manager *man,
  {
  	struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man);
  	uint32_t num_pages = PFN_UP(tbo->base.size);
-	struct amdgpu_gtt_node *node;
+	struct ttm_range_mgr_node *node;
  	int r;
  
  	spin_lock(&mgr->lock);
@@ -141,19 +130,17 @@ static int amdgpu_gtt_mgr_new(struct ttm_resource_manager *man,
  	atomic64_sub(num_pages, &mgr->available);
  	spin_unlock(&mgr->lock);
  
-	node = kzalloc(struct_size(node, base.mm_nodes, 1), GFP_KERNEL);
+	node = kzalloc(struct_size(node, mm_nodes, 1), GFP_KERNEL);
  	if (!node) {
  		r = -ENOMEM;
  		goto err_out;
  	}
  
-	node->tbo = tbo;
-	ttm_resource_init(tbo, place, &node->base.base);
-
+	ttm_resource_init(tbo, place, &node->base);
  	if (place->lpfn) {
  		spin_lock(&mgr->lock);
  		r = drm_mm_insert_node_in_range(&mgr->mm,
-						&node->base.mm_nodes[0],
+						&node->mm_nodes[0],
  						num_pages, tbo->page_alignment,
  						0, place->fpfn, place->lpfn,
  						DRM_MM_INSERT_BEST);
@@ -161,14 +148,14 @@ static int amdgpu_gtt_mgr_new(struct ttm_resource_manager *man,
  		if (unlikely(r))
  			goto err_free;
  
-		node->base.base.start = node->base.mm_nodes[0].start;
+		node->base.start = node->mm_nodes[0].start;
  	} else {
-		node->base.mm_nodes[0].start = 0;
-		node->base.mm_nodes[0].size = node->base.base.num_pages;
-		node->base.base.start = AMDGPU_BO_INVALID_OFFSET;
+		node->mm_nodes[0].start = 0;
+		node->mm_nodes[0].size = node->base.num_pages;
+		node->base.start = AMDGPU_BO_INVALID_OFFSET;
  	}
  
-	*res = &node->base.base;
+	*res = &node->base;
  	return 0;
  
  err_free:
@@ -191,12 +178,12 @@ static int amdgpu_gtt_mgr_new(struct ttm_resource_manager *man,
  static void amdgpu_gtt_mgr_del(struct ttm_resource_manager *man,
  			       struct ttm_resource *res)
  {
-	struct amdgpu_gtt_node *node = to_amdgpu_gtt_node(res);
+	struct ttm_range_mgr_node *node = to_ttm_range_mgr_node(res);
  	struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man);
  
  	spin_lock(&mgr->lock);
-	if (drm_mm_node_allocated(&node->base.mm_nodes[0]))
-		drm_mm_remove_node(&node->base.mm_nodes[0]);
+	if (drm_mm_node_allocated(&node->mm_nodes[0]))
+		drm_mm_remove_node(&node->mm_nodes[0]);
  	spin_unlock(&mgr->lock);
  	atomic64_add(res->num_pages, &mgr->available);
  
@@ -228,14 +215,14 @@ uint64_t amdgpu_gtt_mgr_usage(struct ttm_resource_manager *man)
  int amdgpu_gtt_mgr_recover(struct ttm_resource_manager *man)
  {
  	struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man);
-	struct amdgpu_gtt_node *node;
+	struct ttm_range_mgr_node *node;
  	struct drm_mm_node *mm_node;
  	int r = 0;
  
  	spin_lock(&mgr->lock);
  	drm_mm_for_each_node(mm_node, &mgr->mm) {
-		node = container_of(mm_node, typeof(*node), base.mm_nodes[0]);
-		r = amdgpu_ttm_recover_gart(node->tbo);
+		node = container_of(mm_node, typeof(*node), mm_nodes[0]);
+		r = amdgpu_ttm_recover_gart(node->base.bo);
  		if (r)
  			break;
  	}
diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
index 2431717376e7..7ff6194154fe 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -41,6 +41,7 @@ void ttm_resource_init(struct ttm_buffer_object *bo,
  	res->bus.offset = 0;
  	res->bus.is_iomem = false;
  	res->bus.caching = ttm_cached;
+	res->bo = bo;
  }
  EXPORT_SYMBOL(ttm_resource_init);
  
diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h
index 140b6b9a8bbe..6d0b7a6d2169 100644
--- a/include/drm/ttm/ttm_resource.h
+++ b/include/drm/ttm/ttm_resource.h
@@ -171,6 +171,7 @@ struct ttm_resource {
  	uint32_t mem_type;
  	uint32_t placement;
  	struct ttm_bus_placement bus;
+	struct ttm_buffer_object *bo;

Not that I'm against this change by itself, but this bo pointer is not 
refcounted, and therefore needs a description when it's needed and why. 
What happens, for example when the resource is moved to a ghost object, 
or the bo is killed while the resource is remaining on a lru list (which 
I understand was one of the main purposes with free-standing resources). 
Weak references need a guarantee that the object they pointed to is 
alive. What is that guarantee?

Also could we introduce new TTM structure members where they are first 
used /referenced by TTM and not where they are used by amdgpu? Without 
finding out in patch 3 that this member is needed to look up the bo from 
a lru list the correct response to this patch would have been: That bo 
is amdgpu-specific and needs to be in a driver private struct...


Thanks,

/Thomas


  };
  
  /**