The current method that builds SG table does not allow its users to request a sub-block of the buffer object. The change modifies api signature to allow users to specify both the offset and size of the request. Signed-off-by: Ramesh Errabolu <Ramesh.Errabolu@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 11 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 8 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 9 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 245 +++++++++++++++---- 4 files changed, 210 insertions(+), 63 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 2808d5752de1..b23f44999814 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -326,8 +326,8 @@ static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach, break; case TTM_PL_VRAM: - r = amdgpu_vram_mgr_alloc_sgt(adev, &bo->tbo.mem, attach->dev, - dir, &sgt); + r = amdgpu_vram_mgr_alloc_sgt(adev, &bo->tbo.mem, 0, 0, + attach->dev, dir, &sgt); if (r) return ERR_PTR(r); break; @@ -356,17 +356,12 @@ static void amdgpu_dma_buf_unmap(struct dma_buf_attachment *attach, struct sg_table *sgt, enum dma_data_direction dir) { - struct dma_buf *dma_buf = attach->dmabuf; - struct drm_gem_object *obj = dma_buf->priv; - struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); - struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - if (sgt->sgl->page_link) { dma_unmap_sgtable(attach->dev, sgt, dir, 0); sg_free_table(sgt); kfree(sgt); } else { - amdgpu_vram_mgr_free_sgt(adev, attach->dev, dir, sgt); + amdgpu_vram_mgr_free_sgt(attach->dev, dir, sgt); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index ce92768cd146..ad504d0e5b26 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -222,8 +222,8 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo, * @offset: The offset that drm_mm_node is used for finding. * */ -static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_resource *mem, - uint64_t *offset) +struct drm_mm_node *amdgpu_find_mm_node(struct ttm_resource *mem, + uint64_t *offset) { struct drm_mm_node *mm_node = mem->mm_node; @@ -782,8 +782,8 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_reso return 0; } -static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo, - unsigned long page_offset) +unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo, + unsigned long page_offset) { uint64_t offset = (page_offset << PAGE_SHIFT); struct drm_mm_node *mm; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 4df4cf2fd4dd..e3e413dbfd72 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -116,11 +116,12 @@ int amdgpu_gtt_mgr_recover(struct ttm_resource_manager *man); u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo); int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev, struct ttm_resource *mem, + uint64_t req_offset, + uint64_t req_size, struct device *dev, enum dma_data_direction dir, struct sg_table **sgt); -void amdgpu_vram_mgr_free_sgt(struct amdgpu_device *adev, - struct device *dev, +void amdgpu_vram_mgr_free_sgt(struct device *dev, enum dma_data_direction dir, struct sg_table *sgt); uint64_t amdgpu_vram_mgr_usage(struct ttm_resource_manager *man); @@ -155,6 +156,10 @@ int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma); int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo); int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo); uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type); +struct drm_mm_node *amdgpu_find_mm_node(struct ttm_resource *mem, + uint64_t *offset); +unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo, + unsigned long page_offset); #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 21d18efca277..37a57a5ecd85 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -28,6 +28,9 @@ #include "amdgpu_atomfirmware.h" #include "atom.h" +/* Defines the maximum size of a SG node */ +int64_t VRAM_MAX_SG_NODE_SIZE = 0x80000000; + static inline struct amdgpu_vram_mgr *to_vram_mgr(struct ttm_resource_manager *man) { return container_of(man, struct amdgpu_vram_mgr, manager); @@ -565,6 +568,90 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man, mem->mm_node = NULL; } +/** + * amdgpu_vram_mgr_get_size_sgt - Determine the number of scatterlist (SG) nodes + * that are needed to encapsulate @size amount of memory. + * + * @mm_node: handle of the first memory node to walk down + * @size: number of bytes of memory + * @page_offset: offset of desired memory in page index terms + * @byte_offset: offset of desired memory when it is not on page boundary + * + * Returns number of scatterlist nodes needed to build sg_table + * + */ +uint32_t amdgpu_vram_mgr_get_size_sgt(struct drm_mm_node *mm_node, + uint64_t req_size, uint64_t page_offset, uint32_t byte_offset) +{ + int32_t num_sg_nodes = 0; + + /* Walk down memory nodes list to determine number of SG node */ + while (req_size > 0) { + uint64_t node_size, node_offset, min_size, tmp_cnt; + + /* Determine available memory for current memory node */ + node_offset = page_offset << PAGE_SHIFT; + node_offset = node_offset + byte_offset; + node_size = mm_node->size << PAGE_SHIFT; + node_size = node_size - node_offset; + + /* Offsets apply only to first memory node */ + byte_offset = 0; + page_offset = 0; + + /* Determine number of SG nodes for current memory node */ + min_size = min(req_size, node_size); + tmp_cnt = (min_size + (VRAM_MAX_SG_NODE_SIZE - 1)) / + VRAM_MAX_SG_NODE_SIZE; + num_sg_nodes = num_sg_nodes + tmp_cnt; + req_size = req_size - min_size; + + /* Get handle of next memory node */ + mm_node++; + } + + /* Number of SG nodes in SG Table */ + return num_sg_nodes; +} + +static struct scatterlist *amdgpu_vram_mgr_populate_nodes_sg(uint64_t size, + uint32_t iter, uint64_t pfn, uint32_t offset, + struct device *dev, enum dma_data_direction dir, + struct scatterlist *sg_node) +{ + uint64_t node_addr, sg_size; + dma_addr_t dma_addr; + int32_t idx, ret; + + for (idx = 0; idx < iter; idx++) { + + /* Get bus address from page frame number */ + node_addr = pfn << PAGE_SHIFT; + node_addr = node_addr + (idx * VRAM_MAX_SG_NODE_SIZE); + + /* Determine size of memory scatter node */ + sg_size = min_t(uint64_t, size, VRAM_MAX_SG_NODE_SIZE); + size = size - sg_size; + + dma_addr = dma_map_resource(dev, (phys_addr_t)node_addr, + sg_size, dir, DMA_ATTR_SKIP_CPU_SYNC); + ret = dma_mapping_error(dev, dma_addr); + if (ret) + return NULL; + + /* Populate the scatter node and get handle of next node */ + sg_set_page(sg_node, NULL, sg_size, offset); + sg_dma_address(sg_node) = dma_addr; + sg_dma_len(sg_node) = sg_size; + sg_node = sg_next(sg_node); + + /* Offset applies only to the first node */ + offset = 0; + } + + return sg_node; +} + /** * amdgpu_vram_mgr_alloc_sgt - allocate and fill a sg table * @@ -572,71 +659,132 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man, * @mem: TTM memory object * @dev: the other device * @dir: dma direction - * @sgt: resulting sg table + * @ret_sgt: resulting sg table * * Allocate and fill a sg table from a VRAM allocation. */ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev, struct ttm_resource *mem, - struct device *dev, + uint64_t req_offset, + uint64_t req_size, + struct device *dma_dev, enum dma_data_direction dir, - struct sg_table **sgt) + struct sg_table **ret_sgt) { - struct drm_mm_node *node; - struct scatterlist *sg; - int num_entries = 0; - unsigned int pages; - int i, r; - - *sgt = kmalloc(sizeof(**sgt), GFP_KERNEL); - if (!*sgt) - return -ENOMEM; - - for (pages = mem->num_pages, node = mem->mm_node; - pages; pages -= node->size, ++node) - ++num_entries; - - r = sg_alloc_table(*sgt, num_entries, GFP_KERNEL); - if (r) - goto error_free; - - for_each_sgtable_sg((*sgt), sg, i) - sg->length = 0; - - node = mem->mm_node; - for_each_sgtable_sg((*sgt), sg, i) { - phys_addr_t phys = (node->start << PAGE_SHIFT) + - adev->gmc.aper_base; - size_t size = node->size << PAGE_SHIFT; - dma_addr_t addr; - - ++node; - addr = dma_map_resource(dev, phys, size, dir, - DMA_ATTR_SKIP_CPU_SYNC); - r = dma_mapping_error(dev, addr); - if (r) + uint64_t node_page_offset, byte_offset, page_offset; + uint64_t num_sg_nodes, base_pfn, work_size; + struct drm_mm_node *node, *start_node; + struct scatterlist *sg_node; + struct sg_table *sg_tbl; + int32_t idx, ret; + + /* + * Determine the first mm_node to use in computing MMIO address. This + * is determined by the offset of the request, which can be at a page + * or non-page boundary. Furthermore this offset may not coincide with + * the start of mm_node i.e. it may lie internal to a mm_node. Thus the + * offset of request should be treated as follows: + * + * offset = (N * PAGE_SIZE) + OFFSET_IN_PAGE + * N can be zero or higher + * OFFSET_IN_PAGE could be zero or (PAGE_SIZE - 2) + * mm_node->start refers to K pages off from MMIO base address + * mm_node->size refers to number of pages mm_node encapsulates + * + * @note: It is possible that the offset of starting page of a request is + * one or more pages away from the start of mm_node + */ + uint64_t req_page_idx = req_offset / (_AC(1, UL) << PAGE_SHIFT); + uint64_t req_byte_offset = req_page_idx << PAGE_SHIFT; + uint32_t offset_in_page = req_offset & ((_AC(1, UL) << PAGE_SHIFT) - 1); + + start_node = amdgpu_find_mm_node(mem, &req_byte_offset); + node_page_offset = req_byte_offset >> PAGE_SHIFT; + + /* + * Determine the number of scatter gather (SG) nodes that are needed + * to export requested size of memory. Depending upon request, following + * are possible in building sg_table + * Starting mm_node contributes all of the pages + * Starting mm_nodes does not have all of the pages + */ + num_sg_nodes = amdgpu_vram_mgr_get_size_sgt(start_node, req_size, + node_page_offset, offset_in_page); + + /* Allocate sg_table to carry list of scatter gather (SG) nodes */ + sg_tbl = kmalloc(sizeof(*sg_tbl), GFP_KERNEL); + if (!sg_tbl) { + ret = -ENOMEM; + goto out; + } + ret = sg_alloc_table(sg_tbl, num_sg_nodes, GFP_KERNEL); + if (unlikely(ret)) + goto out; + for_each_sgtable_sg(sg_tbl, sg_node, idx) + sg_node->length = 0; + + /* Determine base page frame number (PFN) of MMIO space */ + base_pfn = adev->gmc.aper_base >> PAGE_SHIFT; + + /* Populate the nodes of scatterlist table */ + work_size = req_size; + sg_node = sg_tbl->sgl; + node = start_node; + byte_offset = offset_in_page; + page_offset = node_page_offset; + while (work_size > 0) { + uint32_t iter; + uint64_t elem_pfn, node_size, node_offset, min_size; + + /* Adjust PFN to correspond to request */ + elem_pfn = base_pfn + node->start + page_offset; + + /* Determine size of available memory upon adjustment */ + node_size = node->size << PAGE_SHIFT; + node_offset = page_offset << PAGE_SHIFT; + node_offset = node_offset + byte_offset; + node_size = node_size - node_offset; + + /* Distribute memory of mm_mode into one or more SG nodes */ + min_size = min_t(int64_t, work_size, node_size); + iter = (min_size + (VRAM_MAX_SG_NODE_SIZE - 1)) / + VRAM_MAX_SG_NODE_SIZE; + sg_node = amdgpu_vram_mgr_populate_nodes_sg(min_size, iter, + elem_pfn, byte_offset, dma_dev, dir, sg_node); + + /* Update size of request left to handle */ + work_size = work_size - min_size; + + /* Determine if there was an error in populating sg nodes */ + if ((sg_node == NULL) && (work_size > 0)) goto error_unmap; - sg_set_page(sg, NULL, size, 0); - sg_dma_address(sg) = addr; - sg_dma_len(sg) = size; + /* Offset apply only to the first SG node */ + page_offset = 0; + byte_offset = 0; + + /* Get handle of next memory node */ + node++; } + + *ret_sgt = sg_tbl; return 0; error_unmap: - for_each_sgtable_sg((*sgt), sg, i) { - if (!sg->length) + for_each_sgtable_sg(sg_tbl, sg_node, idx) { + if (!sg_node->length) continue; - dma_unmap_resource(dev, sg->dma_address, - sg->length, dir, + dma_unmap_resource(dma_dev, sg_node->dma_address, + sg_node->length, dir, DMA_ATTR_SKIP_CPU_SYNC); } - sg_free_table(*sgt); + sg_free_table(sg_tbl); -error_free: - kfree(*sgt); - return r; +out: + kfree(sg_tbl); + *ret_sgt = NULL; + return ret; } /** @@ -649,8 +797,7 @@ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev, * * Free a previously allocate sg table. */ -void amdgpu_vram_mgr_free_sgt(struct amdgpu_device *adev, - struct device *dev, +void amdgpu_vram_mgr_free_sgt(struct device *dev, enum dma_data_direction dir, struct sg_table *sgt) { -- 2.29.2 _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx