Please use git send-email to send patches. Also don't write 1/1 for single patch. Thanks On Sun, Mar 07, 2021 at 10:28:33PM +0800, Zhu Yanjun wrote: > From: Zhu Yanjun <zyjzyj2000@xxxxxxxxx> > > After the commit ("RDMA/umem: Move to allocate SG table from pages"), > the sg list from ib_ume_get is like the following: > " > sg_dma_address(sg):0x4b3c1ce000 > sg_dma_address(sg):0x4c3c1cd000 > sg_dma_address(sg):0x4d3c1cc000 > sg_dma_address(sg):0x4e3c1cb000 > " > > But sometimes, we need sg list like the following: > " > sg_dma_address(sg):0x203b400000 > sg_dma_address(sg):0x213b200000 > sg_dma_address(sg):0x223b000000 > sg_dma_address(sg):0x233ae00000 > sg_dma_address(sg):0x243ac00000 > " > The function ib_umem_add_sg_table can provide the sg list like the > second. And this function is removed in the commit ("RDMA/umem: Move > to allocate SG table from pages"). Now I add it back. > > The new function is ib_umem_get to ib_umem_hugepage_get that calls > ib_umem_add_sg_table. > > This function ib_umem_huagepage_get can get 4K, 2M sg list dma address. > > Fixes: 0c16d9635e3a ("RDMA/umem: Move to allocate SG table from pages") > Signed-off-by: Zhu Yanjun <zyjzyj2000@xxxxxxxxx> > --- > drivers/infiniband/core/umem.c | 197 +++++++++++++++++++++++++++++++++ > include/rdma/ib_umem.h | 3 + > 2 files changed, 200 insertions(+) > > diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c > index 2dde99a9ba07..af8733788b1e 100644 > --- a/drivers/infiniband/core/umem.c > +++ b/drivers/infiniband/core/umem.c > @@ -62,6 +62,203 @@ static void __ib_umem_release(struct ib_device > *dev, struct ib_umem *umem, int d > sg_free_table(&umem->sg_head); > } > > +/* ib_umem_add_sg_table - Add N contiguous pages to scatter table > + * > + * sg: current scatterlist entry > + * page_list: array of npage struct page pointers > + * npages: number of pages in page_list > + * max_seg_sz: maximum segment size in bytes > + * nents: [out] number of entries in the scatterlist > + * > + * Return new end of scatterlist > + */ > +static struct scatterlist *ib_umem_add_sg_table(struct scatterlist *sg, > + struct page **page_list, > + unsigned long npages, > + unsigned int max_seg_sz, > + int *nents) > +{ > + unsigned long first_pfn; > + unsigned long i = 0; > + bool update_cur_sg = false; > + bool first = !sg_page(sg); > + > + /* Check if new page_list is contiguous with end of previous page_list. > + * sg->length here is a multiple of PAGE_SIZE and sg->offset is 0. > + */ > + if (!first && (page_to_pfn(sg_page(sg)) + (sg->length >> PAGE_SHIFT) == > + page_to_pfn(page_list[0]))) > + update_cur_sg = true; > + > + while (i != npages) { > + unsigned long len; > + struct page *first_page = page_list[i]; > + > + first_pfn = page_to_pfn(first_page); > + > + /* Compute the number of contiguous pages we have starting > + * at i > + */ > + for (len = 0; i != npages && > + first_pfn + len == page_to_pfn(page_list[i]) && > + len < (max_seg_sz >> PAGE_SHIFT); > + len++) > + i++; > + > + /* Squash N contiguous pages from page_list into current sge */ > + if (update_cur_sg) { > + if ((max_seg_sz - sg->length) >= (len << PAGE_SHIFT)) { > + sg_set_page(sg, sg_page(sg), > + sg->length + (len << PAGE_SHIFT), > + 0); > + update_cur_sg = false; > + continue; > + } > + update_cur_sg = false; > + } > + > + /* Squash N contiguous pages into next sge or first sge */ > + if (!first) > + sg = sg_next(sg); > + > + (*nents)++; > + sg_set_page(sg, first_page, len << PAGE_SHIFT, 0); > + first = false; > + } > + > + return sg; > +} > + > +/** > + * ib_umem_hugepage_get - Pin and DMA map userspace memory. > + * > + * @device: IB device to connect UMEM > + * @addr: userspace virtual address to start at > + * @size: length of region to pin > + * @access: IB_ACCESS_xxx flags for memory being pinned > + */ > +struct ib_umem *ib_umem_hugepage_get(struct ib_device *device, > + unsigned long addr, > + size_t size, int access) > +{ > + struct ib_umem *umem; > + struct page **page_list; > + unsigned long lock_limit; > + unsigned long new_pinned; > + unsigned long cur_base; > + unsigned long dma_attr = 0; > + struct mm_struct *mm; > + unsigned long npages; > + int ret; > + struct scatterlist *sg; > + unsigned int gup_flags = FOLL_WRITE; > + > + /* > + * If the combination of the addr and size requested for this memory > + * region causes an integer overflow, return error. > + */ > + if (((addr + size) < addr) || > + PAGE_ALIGN(addr + size) < (addr + size)) > + return ERR_PTR(-EINVAL); > + > + if (!can_do_mlock()) > + return ERR_PTR(-EPERM); > + > + if (access & IB_ACCESS_ON_DEMAND) > + return ERR_PTR(-EOPNOTSUPP); > + > + umem = kzalloc(sizeof(*umem), GFP_KERNEL); > + if (!umem) > + return ERR_PTR(-ENOMEM); > + umem->ibdev = device; > + umem->length = size; > + umem->address = addr; > + umem->writable = ib_access_writable(access); > + umem->owning_mm = mm = current->mm; > + mmgrab(mm); > + > + page_list = (struct page **) __get_free_page(GFP_KERNEL); > + if (!page_list) { > + ret = -ENOMEM; > + goto umem_kfree; > + } > + > + npages = ib_umem_num_pages(umem); > + if (npages == 0 || npages > UINT_MAX) { > + ret = -EINVAL; > + goto out; > + } > + > + lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; > + > + new_pinned = atomic64_add_return(npages, &mm->pinned_vm); > + if (new_pinned > lock_limit && !capable(CAP_IPC_LOCK)) { > + atomic64_sub(npages, &mm->pinned_vm); > + ret = -ENOMEM; > + goto out; > + } > + > + cur_base = addr & PAGE_MASK; > + > + ret = sg_alloc_table(&umem->sg_head, npages, GFP_KERNEL); > + if (ret) > + goto vma; > + > + if (!umem->writable) > + gup_flags |= FOLL_FORCE; > + > + sg = umem->sg_head.sgl; > + > + while (npages) { > + cond_resched(); > + ret = pin_user_pages_fast(cur_base, > + min_t(unsigned long, npages, > + PAGE_SIZE / > + sizeof(struct page *)), > + gup_flags | FOLL_LONGTERM, page_list); > + if (ret < 0) > + goto umem_release; > + > + cur_base += ret * PAGE_SIZE; > + npages -= ret; > + > + sg = ib_umem_add_sg_table(sg, page_list, ret, > + dma_get_max_seg_size(device->dma_device), > + &umem->sg_nents); > + } > + > + sg_mark_end(sg); > + > + if (access & IB_ACCESS_RELAXED_ORDERING) > + dma_attr |= DMA_ATTR_WEAK_ORDERING; > + > + umem->nmap = > + ib_dma_map_sg_attrs(device, umem->sg_head.sgl, umem->sg_nents, > + DMA_BIDIRECTIONAL, dma_attr); > + > + if (!umem->nmap) { > + ret = -ENOMEM; > + goto umem_release; > + } > + > + ret = 0; > + goto out; > + > +umem_release: > + __ib_umem_release(device, umem, 0); > +vma: > + atomic64_sub(ib_umem_num_pages(umem), &mm->pinned_vm); > +out: > + free_page((unsigned long) page_list); > +umem_kfree: > + if (ret) { > + mmdrop(umem->owning_mm); > + kfree(umem); > + } > + return ret ? ERR_PTR(ret) : umem; > +} > +EXPORT_SYMBOL(ib_umem_hugepage_get); > + > /** > * ib_umem_find_best_pgsz - Find best HW page size to use for this MR > * > diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h > index 676c57f5ca80..fc6350ff21e6 100644 > --- a/include/rdma/ib_umem.h > +++ b/include/rdma/ib_umem.h > @@ -96,6 +96,9 @@ static inline void > __rdma_umem_block_iter_start(struct ib_block_iter *biter, > __rdma_block_iter_next(biter);) > > #ifdef CONFIG_INFINIBAND_USER_MEM > +struct ib_umem *ib_umem_hugepage_get(struct ib_device *device, > + unsigned long addr, > + size_t size, int access); > > struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr, > size_t size, int access); > -- > 2.25.1