On Mon, Feb 6, 2023 at 6:30 AM Christian König <christian.koenig@xxxxxxx> wrote: > > Am 03.02.23 um 20:08 schrieb Shashank Sharma: > > From: Alex Deucher <alexander.deucher@xxxxxxx> > > > > This patch adds changes to accommodate the new GEM/TTM domain > > for doorbell memory. > > > > Signed-off-by: Alex Deucher <alexander.deucher@xxxxxxx> > > Signed-off-by: Shashank Sharma <shashank.sharma@xxxxxxx> > > --- > > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + > > drivers/gpu/drm/amd/amdgpu/amdgpu_bar_mgr.c | 19 ++++++++++------ > > drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 3 ++- > > drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 24 ++++++++++++++++++++- > > drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 2 +- > > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 17 ++++++++++++++- > > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 3 ++- > > 7 files changed, 58 insertions(+), 11 deletions(-) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > > index e3e2e6e3b485..e1c1a360614e 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > > @@ -974,6 +974,7 @@ struct amdgpu_device { > > atomic64_t vram_pin_size; > > atomic64_t visible_pin_size; > > atomic64_t gart_pin_size; > > + atomic64_t doorbell_pin_size; > > Please drop that, the amount of pinned doorbells is not needed as far as > I can see. > > > > > /* soc15 register offset based on ip, instance and segment */ > > uint32_t *reg_offset[MAX_HWIP][HWIP_MAX_INSTANCE]; > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bar_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bar_mgr.c > > index 0656e5bb4f05..43a3137019b1 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bar_mgr.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bar_mgr.c > > @@ -659,15 +659,17 @@ static void amdgpu_bar_mgr_del(struct ttm_resource_manager *man, > > * @dev: the other device > > * @dir: dma direction > > * @sgt: resulting sg table > > + * @mem_type: memory type > > * > > * Allocate and fill a sg table from a VRAM allocation. > > */ > > int amdgpu_bar_mgr_alloc_sgt(struct amdgpu_device *adev, > > - struct ttm_resource *res, > > - u64 offset, u64 length, > > - struct device *dev, > > - enum dma_data_direction dir, > > - struct sg_table **sgt) > > + struct ttm_resource *res, > > + u64 offset, u64 length, > > + struct device *dev, > > + enum dma_data_direction dir, > > + struct sg_table **sgt, > > + u32 mem_type) > > And again that doesn't make any sense at all. > > For now we don't want to export doorbells through DMA-buf. > > > { > > struct amdgpu_res_cursor cursor; > > struct scatterlist *sg; > > @@ -701,10 +703,15 @@ int amdgpu_bar_mgr_alloc_sgt(struct amdgpu_device *adev, > > */ > > amdgpu_res_first(res, offset, length, &cursor); > > for_each_sgtable_sg((*sgt), sg, i) { > > - phys_addr_t phys = cursor.start + adev->gmc.vram_aper_base; > > + phys_addr_t phys = cursor.start; > > size_t size = cursor.size; > > dma_addr_t addr; > > > > + if (mem_type == TTM_PL_VRAM) > > + phys += adev->gmc.vram_aper_base; > > + else > > + phys += adev->gmc.doorbell_aper_base; > > + > > addr = dma_map_resource(dev, phys, size, dir, > > DMA_ATTR_SKIP_CPU_SYNC); > > r = dma_mapping_error(dev, addr); > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c > > index c48ccde281c3..c645bdc49f34 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c > > @@ -179,9 +179,10 @@ static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach, > > break; > > > > case TTM_PL_VRAM: > > + case AMDGPU_PL_DOORBELL: > > r = amdgpu_bar_mgr_alloc_sgt(adev, bo->tbo.resource, 0, > > bo->tbo.base.size, attach->dev, > > - dir, &sgt); > > + dir, &sgt, bo->tbo.resource->mem_type); > > if (r) > > return ERR_PTR(r); > > break; > > That stuff can be dropped as well as far as I can see. > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > > index 887fc53a7d16..b2cfd46c459b 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > > @@ -147,6 +147,18 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain) > > c++; > > } > > > > + if (domain & AMDGPU_GEM_DOMAIN_DOORBELL) { > > + places[c].fpfn = 0; > > + places[c].lpfn = 0; > > + places[c].mem_type = AMDGPU_PL_DOORBELL; > > + places[c].flags = 0; > > + places[c].flags |= TTM_PL_FLAG_TOPDOWN; > > + > > + if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) > > + places[c].flags |= TTM_PL_FLAG_CONTIGUOUS; > > + c++; > > + } > > + > > if (domain & AMDGPU_GEM_DOMAIN_GTT) { > > places[c].fpfn = 0; > > places[c].lpfn = 0; > > @@ -464,6 +476,13 @@ static bool amdgpu_bo_validate_size(struct amdgpu_device *adev, > > if (man && size < man->size) > > return true; > > goto fail; > > + } else if (domain & AMDGPU_GEM_DOMAIN_DOORBELL) { > > + man = ttm_manager_type(&adev->mman.bdev, AMDGPU_PL_DOORBELL); > > + > > + if (size < man->size) > > + return true; > > + else > > + goto fail; > > Do we ever want userspace to allocate more than one doorbell page at a time? One 4k page would support 512 64 bit doorbells and hence 512 user queues. That seems like a reasonable queue limit. Alex > > > } > > > > /* TODO add more domains checks, such as AMDGPU_GEM_DOMAIN_CPU */ > > @@ -962,8 +981,9 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, > > &adev->visible_pin_size); > > } else if (domain == AMDGPU_GEM_DOMAIN_GTT) { > > atomic64_add(amdgpu_bo_size(bo), &adev->gart_pin_size); > > + } else if (domain == AMDGPU_GEM_DOMAIN_DOORBELL) { > > + atomic64_add(amdgpu_bo_size(bo), &adev->doorbell_pin_size); > > Can be dropped. > > > } > > - > > error: > > return r; > > } > > @@ -1013,6 +1033,8 @@ void amdgpu_bo_unpin(struct amdgpu_bo *bo) > > &adev->visible_pin_size); > > } else if (bo->tbo.resource->mem_type == TTM_PL_TT) { > > atomic64_sub(amdgpu_bo_size(bo), &adev->gart_pin_size); > > + } else if (bo->tbo.resource->mem_type == AMDGPU_PL_DOORBELL) { > > + atomic64_sub(amdgpu_bo_size(bo), &adev->doorbell_pin_size); > > Dito. > > > } > > } > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h > > index 93207badf83f..082f451d26f4 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h > > @@ -326,7 +326,7 @@ int amdgpu_bo_sync_wait(struct amdgpu_bo *bo, void *owner, bool intr); > > u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo); > > u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo); > > void amdgpu_bo_get_memory(struct amdgpu_bo *bo, uint64_t *vram_mem, > > - uint64_t *gtt_mem, uint64_t *cpu_mem); > > + uint64_t *gtt_mem, uint64_t *cpu_mem); > > void amdgpu_bo_add_to_shadow_list(struct amdgpu_bo_vm *vmbo); > > int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow, > > struct dma_fence **fence); > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > > index bb2230d14ea6..71eff2f195a7 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > > @@ -128,6 +128,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, > > case AMDGPU_PL_GDS: > > case AMDGPU_PL_GWS: > > case AMDGPU_PL_OA: > > + case AMDGPU_PL_DOORBELL: > > placement->num_placement = 0; > > placement->num_busy_placement = 0; > > return; > > @@ -500,9 +501,11 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, > > if (old_mem->mem_type == AMDGPU_PL_GDS || > > old_mem->mem_type == AMDGPU_PL_GWS || > > old_mem->mem_type == AMDGPU_PL_OA || > > + old_mem->mem_type == AMDGPU_PL_DOORBELL || > > new_mem->mem_type == AMDGPU_PL_GDS || > > new_mem->mem_type == AMDGPU_PL_GWS || > > - new_mem->mem_type == AMDGPU_PL_OA) { > > + new_mem->mem_type == AMDGPU_PL_OA || > > + new_mem->mem_type == AMDGPU_PL_DOORBELL) { > > /* Nothing to save here */ > > ttm_bo_move_null(bo, new_mem); > > goto out; > > @@ -586,6 +589,17 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev, > > mem->bus.offset += adev->gmc.vram_aper_base; > > mem->bus.is_iomem = true; > > break; > > + case AMDGPU_PL_DOORBELL: > > + mem->bus.offset = mem->start << PAGE_SHIFT; > > That here won't work if we ever allow allocating more than one page for > a doorbell. > > > + > > + if (adev->mman.doorbell_aper_base_kaddr && > > + mem->placement & TTM_PL_FLAG_CONTIGUOUS) > > + mem->bus.addr = (u8 *)adev->mman.doorbell_aper_base_kaddr + > > + mem->bus.offset; > > This doesn't make any sense at all. TTM_PL_FLAG_CONTIGUOUS should > probably be completely ignored for doorbells. > > Regards, > Christian. > > > + > > + mem->bus.offset += adev->gmc.doorbell_aper_base; > > + mem->bus.is_iomem = true; > > + break; > > default: > > return -EINVAL; > > } > > @@ -1267,6 +1281,7 @@ uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_resource *mem) > > flags |= AMDGPU_PTE_VALID; > > > > if (mem && (mem->mem_type == TTM_PL_TT || > > + mem->mem_type == AMDGPU_PL_DOORBELL || > > mem->mem_type == AMDGPU_PL_PREEMPT)) { > > flags |= AMDGPU_PTE_SYSTEM; > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > > index 243deb1ffc54..9971665d7d99 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > > @@ -124,7 +124,8 @@ int amdgpu_bar_mgr_alloc_sgt(struct amdgpu_device *adev, > > u64 offset, u64 size, > > struct device *dev, > > enum dma_data_direction dir, > > - struct sg_table **sgt); > > + struct sg_table **sgt, > > + u32 mem_type); > > void amdgpu_bar_mgr_free_sgt(struct device *dev, > > enum dma_data_direction dir, > > struct sg_table *sgt); >