Hi Tomasz, Thanks for the patch. On Thursday 22 March 2012 11:02:23 Laurent Pinchart wrote: > From: Tomasz Stanislawski <t.stanislaws@xxxxxxxxxxx> > > This patch combines updates and fixes to dma-contig allocator. > Moreover the allocator code was refactored. > The most important changes are: > - functions were reordered > - move compression of scatterlist to separete function > - add support for multichunk but contiguous scatterlists > - simplified implementation of vb2-dma-contig context structure > - let mmap method to use dma_mmap_writecombine > - add support for scatterlist in userptr mode > > Signed-off-by: Marek Szyprowski <m.szyprowski@xxxxxxxxxxx> > [mmap method] > Signed-off-by: Andrzej Pietrasiewicz <andrzej.p@xxxxxxxxxxx> > [scatterlist in userptr mode] > Signed-off-by: Kamil Debski <k.debski@xxxxxxxxxxx> > [bugfixing] > Signed-off-by: Tomasz Stanislawski <t.stanislaws@xxxxxxxxxxx> > [core refactoring, helper functions] > Signed-off-by: Kyungmin Park <kyungmin.park@xxxxxxxxxxx> > --- > drivers/media/video/videobuf2-dma-contig.c | 400 > +++++++++++++++++++++++++--- 1 files changed, 365 insertions(+), 35 > deletions(-) > > diff --git a/drivers/media/video/videobuf2-dma-contig.c > b/drivers/media/video/videobuf2-dma-contig.c index c898e6f..9965465 100644 > --- a/drivers/media/video/videobuf2-dma-contig.c > +++ b/drivers/media/video/videobuf2-dma-contig.c > @@ -10,9 +10,12 @@ > * the Free Software Foundation. > */ > > +#include <linux/dma-buf.h> > +#include <linux/dma-mapping.h> > #include <linux/module.h> > +#include <linux/scatterlist.h> > +#include <linux/sched.h> > #include <linux/slab.h> > -#include <linux/dma-mapping.h> > > #include <media/videobuf2-core.h> > #include <media/videobuf2-memops.h> > @@ -22,16 +25,115 @@ struct vb2_dc_buf { > void *vaddr; > unsigned long size; > dma_addr_t dma_addr; > + struct sg_table *dma_sgt; > + enum dma_data_direction dma_dir; > > /* MMAP related */ > struct vb2_vmarea_handler handler; > atomic_t refcount; > + struct sg_table *sgt_base; > > /* USERPTR related */ > struct vm_area_struct *vma; > }; > > /*********************************************/ > +/* scatterlist table functions */ > +/*********************************************/ > + > +static struct sg_table *vb2_dc_pages_to_sgt(struct page **pages, > + unsigned long n_pages, size_t offset, size_t offset2) > +{ > + struct sg_table *sgt; > + int i, j; /* loop counters */ > + int cur_page, chunks; > + int ret; > + struct scatterlist *s; > + > + sgt = kzalloc(sizeof *sgt, GFP_KERNEL); > + if (!sgt) > + return ERR_PTR(-ENOMEM); > + > + /* compute number of chunks */ > + chunks = 1; > + for (i = 1; i < n_pages; ++i) > + if (pages[i] != pages[i - 1] + 1) > + ++chunks; > + > + ret = sg_alloc_table(sgt, chunks, GFP_KERNEL); > + if (ret) { > + kfree(sgt); > + return ERR_PTR(-ENOMEM); > + } > + > + /* merging chunks and putting them into the scatterlist */ > + cur_page = 0; > + for_each_sg(sgt->sgl, s, sgt->orig_nents, i) { > + size_t size = PAGE_SIZE; > + > + for (j = cur_page + 1; j < n_pages; ++j) { > + if (pages[j] != pages[j - 1] + 1) > + break; > + size += PAGE_SIZE; > + } > + > + /* cut offset if chunk starts at the first page */ > + if (cur_page == 0) > + size -= offset; > + /* cut offset2 if chunk ends at the last page */ > + if (j == n_pages) > + size -= offset2; > + > + sg_set_page(s, pages[cur_page], size, offset); > + offset = 0; > + cur_page = j; > + } > + > + return sgt; > +} > + > +static void vb2_dc_release_sgtable(struct sg_table *sgt) > +{ > + sg_free_table(sgt); > + kfree(sgt); > +} > + > +static void vb2_dc_put_sgtable(struct sg_table *sgt, int dirty) > +{ > + struct scatterlist *s; > + int i, j; > + > + for_each_sg(sgt->sgl, s, sgt->nents, i) { > + struct page *page = sg_page(s); > + int n_pages = PAGE_ALIGN(s->offset + s->length) >> PAGE_SHIFT; > + > + for (j = 0; j < n_pages; ++j, ++page) { > + if (dirty) > + set_page_dirty_lock(page); > + put_page(page); > + } > + } > + > + vb2_dc_release_sgtable(sgt); > +} > + > +static unsigned long vb2_dc_get_contiguous_size(struct sg_table *sgt) > +{ > + struct scatterlist *s; > + dma_addr_t expected = sg_dma_address(sgt->sgl); > + int i; > + unsigned long size = 0; > + > + for_each_sg(sgt->sgl, s, sgt->nents, i) { > + if (sg_dma_address(s) != expected) > + break; > + expected = sg_dma_address(s) + sg_dma_len(s); > + size += sg_dma_len(s); > + } > + return size; > +} > + > +/*********************************************/ > /* callbacks for all buffers */ > /*********************************************/ > > @@ -45,8 +147,6 @@ static void *vb2_dc_cookie(void *buf_priv) > static void *vb2_dc_vaddr(void *buf_priv) > { > struct vb2_dc_buf *buf = buf_priv; > - if (!buf) > - return 0; > > return buf->vaddr; > } > @@ -58,6 +158,28 @@ static unsigned int vb2_dc_num_users(void *buf_priv) > return atomic_read(&buf->refcount); > } > > +static void vb2_dc_prepare(void *buf_priv) > +{ > + struct vb2_dc_buf *buf = buf_priv; > + struct sg_table *sgt = buf->dma_sgt; > + > + if (!sgt) > + return; > + > + dma_sync_sg_for_device(buf->dev, sgt->sgl, sgt->nents, buf->dma_dir); > +} > + > +static void vb2_dc_finish(void *buf_priv) > +{ > + struct vb2_dc_buf *buf = buf_priv; > + struct sg_table *sgt = buf->dma_sgt; > + > + if (!sgt) > + return; > + > + dma_sync_sg_for_cpu(buf->dev, sgt->sgl, sgt->nents, buf->dma_dir); > +} > + > /*********************************************/ > /* callbacks for MMAP buffers */ > /*********************************************/ > @@ -66,31 +188,70 @@ static void vb2_dc_put(void *buf_priv) > { > struct vb2_dc_buf *buf = buf_priv; > > - if (atomic_dec_and_test(&buf->refcount)) { > - dma_free_coherent(buf->dev, buf->size, buf->vaddr, > - buf->dma_addr); > - kfree(buf); > - } > + if (!atomic_dec_and_test(&buf->refcount)) > + return; > + > + vb2_dc_release_sgtable(buf->sgt_base); > + dma_free_coherent(buf->dev, buf->size, buf->vaddr, > + buf->dma_addr); > + kfree(buf); > } > > static void *vb2_dc_alloc(void *alloc_ctx, unsigned long size) > { > struct device *dev = alloc_ctx; > struct vb2_dc_buf *buf; > + int ret; > + int n_pages; > + struct page **pages = NULL; > > buf = kzalloc(sizeof *buf, GFP_KERNEL); > if (!buf) > return ERR_PTR(-ENOMEM); > > - buf->vaddr = dma_alloc_coherent(dev, size, &buf->dma_addr, GFP_KERNEL); > + buf->dev = dev; > + buf->size = size; > + buf->vaddr = dma_alloc_coherent(buf->dev, buf->size, &buf->dma_addr, > + GFP_KERNEL); > + > + ret = -ENOMEM; > if (!buf->vaddr) { > - dev_err(dev, "dma_alloc_coherent of size %ld failed\n", size); > - kfree(buf); > - return ERR_PTR(-ENOMEM); > + dev_err(dev, "dma_alloc_coherent of size %ld failed\n", > + size); > + goto fail_buf; > } > > - buf->dev = dev; > - buf->size = size; > + WARN_ON((unsigned long)buf->vaddr & ~PAGE_MASK); > + WARN_ON(buf->dma_addr & ~PAGE_MASK); > + > + n_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; > + > + pages = kmalloc(n_pages * sizeof pages[0], GFP_KERNEL); > + if (!pages) { > + printk(KERN_ERR "failed to alloc page table\n"); > + goto fail_dma; > + } > + > + ret = dma_get_pages(dev, buf->vaddr, buf->dma_addr, pages, n_pages); As the only purpose of this is to retrieve a list of pages that will be used to create a single-entry sgt, wouldn't it be possible to shortcut the code and get the physical address of the buffer directly ? > + if (ret < 0) { > + printk(KERN_ERR "failed to get buffer pages from DMA API\n"); > + goto fail_pages; > + } > + if (ret != n_pages) { > + ret = -EFAULT; > + printk(KERN_ERR "failed to get all pages from DMA API\n"); > + goto fail_pages; > + } > + > + buf->sgt_base = vb2_dc_pages_to_sgt(pages, n_pages, 0, 0); > + if (IS_ERR(buf->sgt_base)) { > + ret = PTR_ERR(buf->sgt_base); > + printk(KERN_ERR "failed to prepare sg table\n"); > + goto fail_pages; > + } buf->sgt_base isn't used in this patch. I would move the buf->sgt_base creation code to the patch that uses it then, or to its own patch just before the patch that uses it. > + > + /* pages are no longer needed */ > + kfree(pages); > > buf->handler.refcount = &buf->refcount; > buf->handler.put = vb2_dc_put; > @@ -99,59 +260,226 @@ static void *vb2_dc_alloc(void *alloc_ctx, unsigned > long size) atomic_inc(&buf->refcount); > > return buf; > + > +fail_pages: > + kfree(pages); > + > +fail_dma: > + dma_free_coherent(buf->dev, buf->size, buf->vaddr, buf->dma_addr); > + > +fail_buf: > + kfree(buf); > + > + return ERR_PTR(ret); > } > > static int vb2_dc_mmap(void *buf_priv, struct vm_area_struct *vma) > { > struct vb2_dc_buf *buf = buf_priv; > + int ret; > + > + /* > + * dma_mmap_* uses vm_pgoff as in-buffer offset, but we want to > + * map whole buffer > + */ > + vma->vm_pgoff = 0; > + > + ret = dma_mmap_writecombine(buf->dev, vma, buf->vaddr, > + buf->dma_addr, buf->size); > > - if (!buf) { > - printk(KERN_ERR "No buffer to map\n"); > - return -EINVAL; > + if (ret) { > + printk(KERN_ERR "Remapping memory failed, error: %d\n", ret); > + return ret; > } > > - return vb2_mmap_pfn_range(vma, buf->dma_addr, buf->size, > - &vb2_common_vm_ops, &buf->handler); > + vma->vm_flags |= VM_DONTEXPAND | VM_RESERVED; > + vma->vm_private_data = &buf->handler; > + vma->vm_ops = &vb2_common_vm_ops; > + > + vma->vm_ops->open(vma); > + > + printk(KERN_DEBUG "%s: mapped dma addr 0x%08lx at 0x%08lx, size %ld\n", > + __func__, (unsigned long)buf->dma_addr, vma->vm_start, > + buf->size); > + > + return 0; > } > > /*********************************************/ > /* callbacks for USERPTR buffers */ > /*********************************************/ > > +static inline int vma_is_io(struct vm_area_struct *vma) > +{ > + return !!(vma->vm_flags & (VM_IO | VM_PFNMAP)); Isn't VM_PFNMAP enough ? Wouldn't it be possible (at least in theory) to get a discontinuous physical range with VM_IO ? > +} > + > +static int vb2_dc_get_pages(unsigned long start, struct page **pages, > + int n_pages, struct vm_area_struct **copy_vma, int write) > +{ > + struct vm_area_struct *vma; > + int n = 0; /* number of get pages */ > + int ret = -EFAULT; > + > + /* entering critical section for mm access */ > + down_read(¤t->mm->mmap_sem); This will generate AB-BA deadlock warnings if lockdep is enabled. This function is called with the queue lock held, and the mmap() handler which takes the queue lock is called with current->mm->mmap_sem held. This is a known issue with videobuf2, not specific to this patch. The warning is usually a false positive (which we still need to fix, as it worries users), but can become a real issue if an MMAP queue and a USERPTR queue are created by a driver with the same queue lock. > + vma = find_vma(current->mm, start); > + if (!vma) { > + printk(KERN_ERR "no vma for address %lu\n", start); > + goto cleanup; > + } > + > + if (vma_is_io(vma)) { > + unsigned long pfn; > + > + if (vma->vm_end - start < n_pages * PAGE_SIZE) { > + printk(KERN_ERR "vma is too small\n"); > + goto cleanup; > + } > + > + for (n = 0; n < n_pages; ++n, start += PAGE_SIZE) { > + ret = follow_pfn(vma, start, &pfn); > + if (ret) { > + printk(KERN_ERR "no page for address %lu\n", > + start); > + goto cleanup; > + } > + pages[n] = pfn_to_page(pfn); > + get_page(pages[n]); This worries me. When the VM_PFNMAP flag is set, the memory pages are not backed by a struct page. Creating a struct page pointer out of it can be an acceptable hack (for instance to store a page in an scatterlist with sg_set_page() and then retrieve its physical address with sg_phys()), but you should not expect the struct page to be valid for anything else. Calling get_page() on it will likely crash. > + } > + } else { > + n = get_user_pages(current, current->mm, start & PAGE_MASK, > + n_pages, write, 1, pages, NULL); > + if (n != n_pages) { > + printk(KERN_ERR "got only %d of %d user pages\n", > + n, n_pages); > + goto cleanup; > + } > + } > + > + *copy_vma = vb2_get_vma(vma); > + if (!*copy_vma) { > + printk(KERN_ERR "failed to copy vma\n"); > + ret = -ENOMEM; > + goto cleanup; > + } Do we really need to make a copy of the VMA ? The only reason why we store a pointer to it is to check the flags in vb2_dc_put_userptr(). We could store the flags instead and avoid vb2_get_dma()/vb2_put_dma() calls altogether. > + > + /* leaving critical section for mm access */ > + up_read(¤t->mm->mmap_sem); > + > + return 0; > + > +cleanup: > + up_read(¤t->mm->mmap_sem); > + > + /* putting user pages if used, can be done wothout the lock */ > + while (n) > + put_page(pages[--n]); > + > + return ret; > +} > + > static void *vb2_dc_get_userptr(void *alloc_ctx, unsigned long vaddr, > - unsigned long size, int write) > + unsigned long size, int write) > { > struct vb2_dc_buf *buf; > - struct vm_area_struct *vma; > - dma_addr_t dma_addr = 0; > - int ret; > + unsigned long start, end, offset, offset2; > + struct page **pages; > + int n_pages; > + int ret = 0; > + struct sg_table *sgt; > + unsigned long contig_size; > > buf = kzalloc(sizeof *buf, GFP_KERNEL); > if (!buf) > return ERR_PTR(-ENOMEM); > > - ret = vb2_get_contig_userptr(vaddr, size, &vma, &dma_addr); > + buf->dev = alloc_ctx; > + buf->dma_dir = write ? DMA_FROM_DEVICE : DMA_TO_DEVICE; > + > + start = (unsigned long)vaddr & PAGE_MASK; > + offset = (unsigned long)vaddr & ~PAGE_MASK; > + end = PAGE_ALIGN((unsigned long)vaddr + size); > + offset2 = end - (unsigned long)vaddr - size; > + n_pages = (end - start) >> PAGE_SHIFT; > + > + pages = kmalloc(n_pages * sizeof pages[0], GFP_KERNEL); > + if (!pages) { > + ret = -ENOMEM; > + printk(KERN_ERR "failed to allocate pages table\n"); > + goto fail_buf; > + } > + > + /* extract page list from userspace mapping */ > + ret = vb2_dc_get_pages(start, pages, n_pages, &buf->vma, write); > if (ret) { > - printk(KERN_ERR "Failed acquiring VMA for vaddr 0x%08lx\n", > - vaddr); > - kfree(buf); > - return ERR_PTR(ret); > + printk(KERN_ERR "failed to get user pages\n"); > + goto fail_pages; > + } > + > + sgt = vb2_dc_pages_to_sgt(pages, n_pages, offset, offset2); > + if (!sgt) { > + printk(KERN_ERR "failed to create scatterlist table\n"); > + ret = -ENOMEM; > + goto fail_get_pages; > } This looks overly complex to me. You create a multi-chunk sgt out of the user pointer address and map it completely, and then check if it starts with a big enough contiguous chunk. Why don't you create an sgt with a single continuous chunk then ? In the VM_PFNMAP case you could check whether the area is contiguous when you follow the PFNs, stop at the first discontinuity, and create an sgt with a single element right there. You would then need to call vb2_dc_pages_to_sgt() in the normal case only, and stop at the first discontinuity as well. > > + /* pages are no longer needed */ > + kfree(pages); > + pages = NULL; > + > + sgt->nents = dma_map_sg(buf->dev, sgt->sgl, sgt->orig_nents, > + buf->dma_dir); > + if (sgt->nents <= 0) { > + printk(KERN_ERR "failed to map scatterlist\n"); > + ret = -EIO; > + goto fail_sgt; > + } > + > + contig_size = vb2_dc_get_contiguous_size(sgt); > + if (contig_size < size) { > + printk(KERN_ERR "contiguous mapping is too small %lu/%lu\n", > + contig_size, size); > + ret = -EFAULT; > + goto fail_map_sg; > + } > + > + buf->dma_addr = sg_dma_address(sgt->sgl); > buf->size = size; > - buf->dma_addr = dma_addr; > - buf->vma = vma; > + buf->dma_sgt = sgt; > + > + atomic_inc(&buf->refcount); > > return buf; > + > +fail_map_sg: > + dma_unmap_sg(buf->dev, sgt->sgl, sgt->nents, buf->dma_dir); I think this will break in the VM_PFNMAP case on non-coherent architectures. arm_dma_unmap_page() will call __dma_page_dev_to_cpu() in that case, which can dereference struct page. As explain above, the struct page isn't valid with VM_PFNMAP. I haven't check the dma_map_sg() and dma_sync_sg_*() calls, but changes are they might break as well. > + > +fail_sgt: > + vb2_dc_put_sgtable(sgt, 0); > + > +fail_get_pages: > + while (pages && n_pages) > + put_page(pages[--n_pages]); > + vb2_put_vma(buf->vma); > + > +fail_pages: > + kfree(pages); /* kfree is NULL-proof */ > + > +fail_buf: > + kfree(buf); > + > + return ERR_PTR(ret); > } > > -static void vb2_dc_put_userptr(void *mem_priv) > +static void vb2_dc_put_userptr(void *buf_priv) > { > - struct vb2_dc_buf *buf = mem_priv; > - > - if (!buf) > - return; > + struct vb2_dc_buf *buf = buf_priv; > + struct sg_table *sgt = buf->dma_sgt; > > + dma_unmap_sg(buf->dev, sgt->sgl, sgt->orig_nents, buf->dma_dir); > + vb2_dc_put_sgtable(sgt, !vma_is_io(buf->vma)); > vb2_put_vma(buf->vma); > kfree(buf); > } > @@ -168,6 +496,8 @@ const struct vb2_mem_ops vb2_dma_contig_memops = { > .mmap = vb2_dc_mmap, > .get_userptr = vb2_dc_get_userptr, > .put_userptr = vb2_dc_put_userptr, > + .prepare = vb2_dc_prepare, > + .finish = vb2_dc_finish, > .num_users = vb2_dc_num_users, > }; > EXPORT_SYMBOL_GPL(vb2_dma_contig_memops); -- Regards, Laurent Pinchart _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/dri-devel