Am 03.02.23 um 20:08 schrieb Shashank Sharma:
From: Alex Deucher <alexander.deucher@xxxxxxx>
This patch adds changes to accommodate the new GEM/TTM domain
for doorbell memory.
Signed-off-by: Alex Deucher <alexander.deucher@xxxxxxx>
Signed-off-by: Shashank Sharma <shashank.sharma@xxxxxxx>
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 +
drivers/gpu/drm/amd/amdgpu/amdgpu_bar_mgr.c | 19 ++++++++++------
drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 3 ++-
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 24 ++++++++++++++++++++-
drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 2 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 17 ++++++++++++++-
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 3 ++-
7 files changed, 58 insertions(+), 11 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index e3e2e6e3b485..e1c1a360614e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -974,6 +974,7 @@ struct amdgpu_device {
atomic64_t vram_pin_size;
atomic64_t visible_pin_size;
atomic64_t gart_pin_size;
+ atomic64_t doorbell_pin_size;
Please drop that, the amount of pinned doorbells is not needed as far as
I can see.
/* soc15 register offset based on ip, instance and segment */
uint32_t *reg_offset[MAX_HWIP][HWIP_MAX_INSTANCE];
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bar_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bar_mgr.c
index 0656e5bb4f05..43a3137019b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bar_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bar_mgr.c
@@ -659,15 +659,17 @@ static void amdgpu_bar_mgr_del(struct ttm_resource_manager *man,
* @dev: the other device
* @dir: dma direction
* @sgt: resulting sg table
+ * @mem_type: memory type
*
* Allocate and fill a sg table from a VRAM allocation.
*/
int amdgpu_bar_mgr_alloc_sgt(struct amdgpu_device *adev,
- struct ttm_resource *res,
- u64 offset, u64 length,
- struct device *dev,
- enum dma_data_direction dir,
- struct sg_table **sgt)
+ struct ttm_resource *res,
+ u64 offset, u64 length,
+ struct device *dev,
+ enum dma_data_direction dir,
+ struct sg_table **sgt,
+ u32 mem_type)
And again that doesn't make any sense at all.
For now we don't want to export doorbells through DMA-buf.
{
struct amdgpu_res_cursor cursor;
struct scatterlist *sg;
@@ -701,10 +703,15 @@ int amdgpu_bar_mgr_alloc_sgt(struct amdgpu_device *adev,
*/
amdgpu_res_first(res, offset, length, &cursor);
for_each_sgtable_sg((*sgt), sg, i) {
- phys_addr_t phys = cursor.start + adev->gmc.vram_aper_base;
+ phys_addr_t phys = cursor.start;
size_t size = cursor.size;
dma_addr_t addr;
+ if (mem_type == TTM_PL_VRAM)
+ phys += adev->gmc.vram_aper_base;
+ else
+ phys += adev->gmc.doorbell_aper_base;
+
addr = dma_map_resource(dev, phys, size, dir,
DMA_ATTR_SKIP_CPU_SYNC);
r = dma_mapping_error(dev, addr);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index c48ccde281c3..c645bdc49f34 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -179,9 +179,10 @@ static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach,
break;
case TTM_PL_VRAM:
+ case AMDGPU_PL_DOORBELL:
r = amdgpu_bar_mgr_alloc_sgt(adev, bo->tbo.resource, 0,
bo->tbo.base.size, attach->dev,
- dir, &sgt);
+ dir, &sgt, bo->tbo.resource->mem_type);
if (r)
return ERR_PTR(r);
break;
That stuff can be dropped as well as far as I can see.
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 887fc53a7d16..b2cfd46c459b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -147,6 +147,18 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
c++;
}
+ if (domain & AMDGPU_GEM_DOMAIN_DOORBELL) {
+ places[c].fpfn = 0;
+ places[c].lpfn = 0;
+ places[c].mem_type = AMDGPU_PL_DOORBELL;
+ places[c].flags = 0;
+ places[c].flags |= TTM_PL_FLAG_TOPDOWN;
+
+ if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
+ places[c].flags |= TTM_PL_FLAG_CONTIGUOUS;
+ c++;
+ }
+
if (domain & AMDGPU_GEM_DOMAIN_GTT) {
places[c].fpfn = 0;
places[c].lpfn = 0;
@@ -464,6 +476,13 @@ static bool amdgpu_bo_validate_size(struct amdgpu_device *adev,
if (man && size < man->size)
return true;
goto fail;
+ } else if (domain & AMDGPU_GEM_DOMAIN_DOORBELL) {
+ man = ttm_manager_type(&adev->mman.bdev, AMDGPU_PL_DOORBELL);
+
+ if (size < man->size)
+ return true;
+ else
+ goto fail;
Do we ever want userspace to allocate more than one doorbell page at a time?
}
/* TODO add more domains checks, such as AMDGPU_GEM_DOMAIN_CPU */
@@ -962,8 +981,9 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
&adev->visible_pin_size);
} else if (domain == AMDGPU_GEM_DOMAIN_GTT) {
atomic64_add(amdgpu_bo_size(bo), &adev->gart_pin_size);
+ } else if (domain == AMDGPU_GEM_DOMAIN_DOORBELL) {
+ atomic64_add(amdgpu_bo_size(bo), &adev->doorbell_pin_size);
Can be dropped.
}
-
error:
return r;
}
@@ -1013,6 +1033,8 @@ void amdgpu_bo_unpin(struct amdgpu_bo *bo)
&adev->visible_pin_size);
} else if (bo->tbo.resource->mem_type == TTM_PL_TT) {
atomic64_sub(amdgpu_bo_size(bo), &adev->gart_pin_size);
+ } else if (bo->tbo.resource->mem_type == AMDGPU_PL_DOORBELL) {
+ atomic64_sub(amdgpu_bo_size(bo), &adev->doorbell_pin_size);
Dito.
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 93207badf83f..082f451d26f4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -326,7 +326,7 @@ int amdgpu_bo_sync_wait(struct amdgpu_bo *bo, void *owner, bool intr);
u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo);
u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo);
void amdgpu_bo_get_memory(struct amdgpu_bo *bo, uint64_t *vram_mem,
- uint64_t *gtt_mem, uint64_t *cpu_mem);
+ uint64_t *gtt_mem, uint64_t *cpu_mem);
void amdgpu_bo_add_to_shadow_list(struct amdgpu_bo_vm *vmbo);
int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow,
struct dma_fence **fence);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index bb2230d14ea6..71eff2f195a7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -128,6 +128,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
case AMDGPU_PL_GDS:
case AMDGPU_PL_GWS:
case AMDGPU_PL_OA:
+ case AMDGPU_PL_DOORBELL:
placement->num_placement = 0;
placement->num_busy_placement = 0;
return;
@@ -500,9 +501,11 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
if (old_mem->mem_type == AMDGPU_PL_GDS ||
old_mem->mem_type == AMDGPU_PL_GWS ||
old_mem->mem_type == AMDGPU_PL_OA ||
+ old_mem->mem_type == AMDGPU_PL_DOORBELL ||
new_mem->mem_type == AMDGPU_PL_GDS ||
new_mem->mem_type == AMDGPU_PL_GWS ||
- new_mem->mem_type == AMDGPU_PL_OA) {
+ new_mem->mem_type == AMDGPU_PL_OA ||
+ new_mem->mem_type == AMDGPU_PL_DOORBELL) {
/* Nothing to save here */
ttm_bo_move_null(bo, new_mem);
goto out;
@@ -586,6 +589,17 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev,
mem->bus.offset += adev->gmc.vram_aper_base;
mem->bus.is_iomem = true;
break;
+ case AMDGPU_PL_DOORBELL:
+ mem->bus.offset = mem->start << PAGE_SHIFT;
That here won't work if we ever allow allocating more than one page for
a doorbell.
+
+ if (adev->mman.doorbell_aper_base_kaddr &&
+ mem->placement & TTM_PL_FLAG_CONTIGUOUS)
+ mem->bus.addr = (u8 *)adev->mman.doorbell_aper_base_kaddr +
+ mem->bus.offset;
This doesn't make any sense at all. TTM_PL_FLAG_CONTIGUOUS should
probably be completely ignored for doorbells.
Regards,
Christian.
+
+ mem->bus.offset += adev->gmc.doorbell_aper_base;
+ mem->bus.is_iomem = true;
+ break;
default:
return -EINVAL;
}
@@ -1267,6 +1281,7 @@ uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_resource *mem)
flags |= AMDGPU_PTE_VALID;
if (mem && (mem->mem_type == TTM_PL_TT ||
+ mem->mem_type == AMDGPU_PL_DOORBELL ||
mem->mem_type == AMDGPU_PL_PREEMPT)) {
flags |= AMDGPU_PTE_SYSTEM;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index 243deb1ffc54..9971665d7d99 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -124,7 +124,8 @@ int amdgpu_bar_mgr_alloc_sgt(struct amdgpu_device *adev,
u64 offset, u64 size,
struct device *dev,
enum dma_data_direction dir,
- struct sg_table **sgt);
+ struct sg_table **sgt,
+ u32 mem_type);
void amdgpu_bar_mgr_free_sgt(struct device *dev,
enum dma_data_direction dir,
struct sg_table *sgt);