On 05/17/2012 07:54 PM, Marek Szyprowski wrote: > This patch changes dma-mapping subsystem to use generic vmalloc areas > for all consistent dma allocations. This increases the total size limit > of the consistent allocations and removes platform hacks and a lot of > duplicated code. > I like this patch very much! There are just small nitpicks below. > Atomic allocations are served from special pool preallocated on boot, > becasue vmalloc areas cannot be reliably created in atomic context. typo because > > Signed-off-by: Marek Szyprowski <m.szyprowski@xxxxxxxxxxx> > --- > Documentation/kernel-parameters.txt | 4 + > arch/arm/include/asm/dma-mapping.h | 2 +- > arch/arm/mm/dma-mapping.c | 360 ++++++++++++++++------------------- > 3 files changed, 171 insertions(+), 195 deletions(-) > > diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt > index c1601e5..ba58f50 100644 > --- a/Documentation/kernel-parameters.txt > +++ b/Documentation/kernel-parameters.txt > @@ -515,6 +515,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. > a hypervisor. > Default: yes > > + coherent_pool=nn[KMG] [ARM,KNL] > + Sets the size of memory pool for coherent, atomic dma > + allocations. > + > code_bytes [X86] How many bytes of object code to print > in an oops report. > Range: 0 - 8192 > diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h > index cb3b7c9..92b0afb 100644 > --- a/arch/arm/include/asm/dma-mapping.h > +++ b/arch/arm/include/asm/dma-mapping.h > @@ -210,7 +210,7 @@ int dma_mmap_writecombine(struct device *, struct vm_area_struct *, > * DMA region above it's default value of 2MB. It must be called before the > * memory allocator is initialised, i.e. before any core_initcall. > */ > -extern void __init init_consistent_dma_size(unsigned long size); > +static inline void init_consistent_dma_size(unsigned long size) { } > > > #ifdef CONFIG_DMABOUNCE > diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c > index db23ae4..3be4de2 100644 > --- a/arch/arm/mm/dma-mapping.c > +++ b/arch/arm/mm/dma-mapping.c > @@ -19,6 +19,8 @@ > #include <linux/dma-mapping.h> > #include <linux/highmem.h> > #include <linux/slab.h> > +#include <linux/io.h> > +#include <linux/vmalloc.h> > > #include <asm/memory.h> > #include <asm/highmem.h> > @@ -119,210 +121,178 @@ static void __dma_free_buffer(struct page *page, size_t size) > } > > #ifdef CONFIG_MMU > - > -#define CONSISTENT_OFFSET(x) (((unsigned long)(x) - consistent_base) >> PAGE_SHIFT) > -#define CONSISTENT_PTE_INDEX(x) (((unsigned long)(x) - consistent_base) >> PMD_SHIFT) > - > -/* > - * These are the page tables (2MB each) covering uncached, DMA consistent allocations > - */ > -static pte_t **consistent_pte; > - > -#define DEFAULT_CONSISTENT_DMA_SIZE SZ_2M > - > -unsigned long consistent_base = CONSISTENT_END - DEFAULT_CONSISTENT_DMA_SIZE; > - > -void __init init_consistent_dma_size(unsigned long size) > -{ > - unsigned long base = CONSISTENT_END - ALIGN(size, SZ_2M); > - > - BUG_ON(consistent_pte); /* Check we're called before DMA region init */ > - BUG_ON(base < VMALLOC_END); > - > - /* Grow region to accommodate specified size */ > - if (base < consistent_base) > - consistent_base = base; > -} > - > -#include "vmregion.h" > - > -static struct arm_vmregion_head consistent_head = { > - .vm_lock = __SPIN_LOCK_UNLOCKED(&consistent_head.vm_lock), > - .vm_list = LIST_HEAD_INIT(consistent_head.vm_list), > - .vm_end = CONSISTENT_END, > -}; > - > #ifdef CONFIG_HUGETLB_PAGE > #error ARM Coherent DMA allocator does not (yet) support huge TLB > #endif > > -/* > - * Initialise the consistent memory allocation. > - */ > -static int __init consistent_init(void) > -{ > - int ret = 0; > - pgd_t *pgd; > - pud_t *pud; > - pmd_t *pmd; > - pte_t *pte; > - int i = 0; > - unsigned long base = consistent_base; > - unsigned long num_ptes = (CONSISTENT_END - base) >> PMD_SHIFT; > - > - consistent_pte = kmalloc(num_ptes * sizeof(pte_t), GFP_KERNEL); > - if (!consistent_pte) { > - pr_err("%s: no memory\n", __func__); > - return -ENOMEM; > - } > - > - pr_debug("DMA memory: 0x%08lx - 0x%08lx:\n", base, CONSISTENT_END); > - consistent_head.vm_start = base; > - > - do { > - pgd = pgd_offset(&init_mm, base); > - > - pud = pud_alloc(&init_mm, pgd, base); > - if (!pud) { > - printk(KERN_ERR "%s: no pud tables\n", __func__); > - ret = -ENOMEM; > - break; > - } > - > - pmd = pmd_alloc(&init_mm, pud, base); > - if (!pmd) { > - printk(KERN_ERR "%s: no pmd tables\n", __func__); > - ret = -ENOMEM; > - break; > - } > - WARN_ON(!pmd_none(*pmd)); > - > - pte = pte_alloc_kernel(pmd, base); > - if (!pte) { > - printk(KERN_ERR "%s: no pte tables\n", __func__); > - ret = -ENOMEM; > - break; > - } > - > - consistent_pte[i++] = pte; > - base += PMD_SIZE; > - } while (base < CONSISTENT_END); > - > - return ret; > -} > - > -core_initcall(consistent_init); > - > static void * > __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot, > const void *caller) > { > - struct arm_vmregion *c; > - size_t align; > - int bit; > + struct vm_struct *area; > + unsigned long addr; > > - if (!consistent_pte) { > - printk(KERN_ERR "%s: not initialised\n", __func__); > + area = get_vm_area_caller(size, VM_DMA | VM_USERMAP, caller); Out of curiosity. Do we always map dma area into user's address space? > + if (!area) > + return NULL; > + addr = (unsigned long)area->addr; > + area->phys_addr = __pfn_to_phys(page_to_pfn(page)); > + > + if (ioremap_page_range(addr, addr + size, area->phys_addr, prot)) { > + vunmap((void *)addr); > + return NULL; > + } > + return (void *)addr; > +} > + > +static void __dma_free_remap(void *cpu_addr, size_t size) > +{ > + struct vm_struct *area; > + > + read_lock(&vmlist_lock); Why do we need vmlist_lock? > + area = find_vm_area(cpu_addr); find_vm_area only checks vmalloced regions so we need more check. if (!area || !(area->flags & VM_DMA)) > + if (!area) { > + pr_err("%s: trying to free invalid coherent area: %p\n", > + __func__, cpu_addr); > + dump_stack(); > + read_unlock(&vmlist_lock); > + return; > + } > + unmap_kernel_range((unsigned long)cpu_addr, size); > + read_unlock(&vmlist_lock); > + vunmap(cpu_addr); > +} > + > +struct dma_pool { > + size_t size; > + spinlock_t lock; > + unsigned long *bitmap; > + unsigned long count; Nitpick. What does count mean? nr_pages? > + void *vaddr; > + struct page *page; > +}; > + > +static struct dma_pool atomic_pool = { > + .size = SZ_256K, > +}; AFAIUC, we could set it to 2M but you are reducing it to 256K. What's the justification for that default value? > + > +static int __init early_coherent_pool(char *p) > +{ > + atomic_pool.size = memparse(p, &p); > + return 0; > +} > +early_param("coherent_pool", early_coherent_pool); > + > +/* > + * Initialise the coherent pool for atomic allocations. > + */ > +static int __init atomic_pool_init(void) > +{ > + struct dma_pool *pool = &atomic_pool; > + pgprot_t prot = pgprot_dmacoherent(pgprot_kernel); > + unsigned long count = pool->size >> PAGE_SHIFT; > + gfp_t gfp = GFP_KERNEL | GFP_DMA; > + unsigned long *bitmap; > + struct page *page; > + void *ptr; > + int bitmap_size = BITS_TO_LONGS(count) * sizeof(long); > + > + bitmap = kzalloc(bitmap_size, GFP_KERNEL); > + if (!bitmap) > + goto no_bitmap; > + > + page = __dma_alloc_buffer(NULL, pool->size, gfp); > + if (!page) > + goto no_page; > + > + ptr = __dma_alloc_remap(page, pool->size, gfp, prot, NULL); > + if (ptr) { > + spin_lock_init(&pool->lock); > + pool->vaddr = ptr; > + pool->page = page; > + pool->bitmap = bitmap; > + pool->count = count; > + pr_info("DMA: preallocated %u KiB pool for atomic coherent allocations\n", > + (unsigned)pool->size / 1024); > + return 0; > + } > + > + __dma_free_buffer(page, pool->size); > +no_page: > + kfree(bitmap); > +no_bitmap: > + pr_err("DMA: failed to allocate %u KiB pool for atomic coherent allocation\n", > + (unsigned)pool->size / 1024); > + return -ENOMEM; > +} > +core_initcall(atomic_pool_init); > + > +static void *__alloc_from_pool(size_t size, struct page **ret_page) > +{ > + struct dma_pool *pool = &atomic_pool; > + unsigned int count = size >> PAGE_SHIFT; > + unsigned int pageno; > + unsigned long flags; > + void *ptr = NULL; > + size_t align; > + > + if (!pool->vaddr) { > + pr_err("%s: coherent pool not initialised!\n", __func__); > dump_stack(); > return NULL; > } > > /* > - * Align the virtual region allocation - maximum alignment is > - * a section size, minimum is a page size. This helps reduce > - * fragmentation of the DMA space, and also prevents allocations > - * smaller than a section from crossing a section boundary. > + * Align the region allocation - allocations from pool are rather > + * small, so align them to their order in pages, minimum is a page > + * size. This helps reduce fragmentation of the DMA space. > */ > - bit = fls(size - 1); > - if (bit > SECTION_SHIFT) > - bit = SECTION_SHIFT; > - align = 1 << bit; > + align = PAGE_SIZE << get_order(size); > > - /* > - * Allocate a virtual address in the consistent mapping region. > - */ > - c = arm_vmregion_alloc(&consistent_head, align, size, > - gfp & ~(__GFP_DMA | __GFP_HIGHMEM), caller); > - if (c) { > - pte_t *pte; > - int idx = CONSISTENT_PTE_INDEX(c->vm_start); > - u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1); > - > - pte = consistent_pte[idx] + off; > - c->vm_pages = page; > - > - do { > - BUG_ON(!pte_none(*pte)); > - > - set_pte_ext(pte, mk_pte(page, prot), 0); > - page++; > - pte++; > - off++; > - if (off >= PTRS_PER_PTE) { > - off = 0; > - pte = consistent_pte[++idx]; > - } > - } while (size -= PAGE_SIZE); > - > - dsb(); > - > - return (void *)c->vm_start; > + spin_lock_irqsave(&pool->lock, flags); > + pageno = bitmap_find_next_zero_area(pool->bitmap, pool->count, > + 0, count, (1 << align) - 1); > + if (pageno < pool->count) { > + bitmap_set(pool->bitmap, pageno, count); > + ptr = pool->vaddr + PAGE_SIZE * pageno; > + *ret_page = pool->page + pageno; > } > - return NULL; > + spin_unlock_irqrestore(&pool->lock, flags); > + > + return ptr; > } > > -static void __dma_free_remap(void *cpu_addr, size_t size) > +static int __free_from_pool(void *start, size_t size) > { > - struct arm_vmregion *c; > - unsigned long addr; > - pte_t *ptep; > - int idx; > - u32 off; > + struct dma_pool *pool = &atomic_pool; > + unsigned long pageno, count; > + unsigned long flags; > > - c = arm_vmregion_find_remove(&consistent_head, (unsigned long)cpu_addr); > - if (!c) { > - printk(KERN_ERR "%s: trying to free invalid coherent area: %p\n", > - __func__, cpu_addr); > + if (start < pool->vaddr || start > pool->vaddr + pool->size) > + return 0; > + > + if (start + size > pool->vaddr + pool->size) { > + pr_err("%s: freeing wrong coherent size from pool\n", __func__); > dump_stack(); > - return; > + return 0; > } > > - if ((c->vm_end - c->vm_start) != size) { > - printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n", > - __func__, c->vm_end - c->vm_start, size); > - dump_stack(); > - size = c->vm_end - c->vm_start; > - } > + pageno = (start - pool->vaddr) >> PAGE_SHIFT; > + count = size >> PAGE_SHIFT; > > - idx = CONSISTENT_PTE_INDEX(c->vm_start); > - off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1); > - ptep = consistent_pte[idx] + off; > - addr = c->vm_start; > - do { > - pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep); > + spin_lock_irqsave(&pool->lock, flags); > + bitmap_clear(pool->bitmap, pageno, count); > + spin_unlock_irqrestore(&pool->lock, flags); > > - ptep++; > - addr += PAGE_SIZE; > - off++; > - if (off >= PTRS_PER_PTE) { > - off = 0; > - ptep = consistent_pte[++idx]; > - } > - > - if (pte_none(pte) || !pte_present(pte)) > - printk(KERN_CRIT "%s: bad page in kernel page table\n", > - __func__); > - } while (size -= PAGE_SIZE); > - > - flush_tlb_kernel_range(c->vm_start, c->vm_end); > - > - arm_vmregion_free(&consistent_head, c); > + return 1; > } > > #else /* !CONFIG_MMU */ > > #define __dma_alloc_remap(page, size, gfp, prot, c) page_address(page) > #define __dma_free_remap(addr, size) do { } while (0) > +#define __alloc_from_pool(size, ret_page) NULL > +#define __free_from_pool(addr, size) 0 > > #endif /* CONFIG_MMU */ > > @@ -345,6 +315,16 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, > *handle = ~0; > size = PAGE_ALIGN(size); > > + /* > + * Atomic allocations need special handling > + */ > + if (gfp & GFP_ATOMIC && !arch_is_coherent()) { > + addr = __alloc_from_pool(size, &page); > + if (addr) > + *handle = pfn_to_dma(dev, page_to_pfn(page)); > + return addr; > + } > + > page = __dma_alloc_buffer(dev, size, gfp); > if (!page) > return NULL; > @@ -398,24 +378,16 @@ static int dma_mmap(struct device *dev, struct vm_area_struct *vma, > { > int ret = -ENXIO; > #ifdef CONFIG_MMU > - unsigned long user_size, kern_size; > - struct arm_vmregion *c; > + unsigned long user_count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; > + unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; > + unsigned long pfn = dma_to_pfn(dev, dma_addr); > + unsigned long off = vma->vm_pgoff; > > - user_size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; > - > - c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr); > - if (c) { > - unsigned long off = vma->vm_pgoff; > - > - kern_size = (c->vm_end - c->vm_start) >> PAGE_SHIFT; > - > - if (off < kern_size && > - user_size <= (kern_size - off)) { > - ret = remap_pfn_range(vma, vma->vm_start, > - page_to_pfn(c->vm_pages) + off, > - user_size << PAGE_SHIFT, > - vma->vm_page_prot); > - } > + if (off < count && user_count <= (count - off)) { > + ret = remap_pfn_range(vma, vma->vm_start, > + pfn + off, > + user_count << PAGE_SHIFT, > + vma->vm_page_prot); > } > #endif /* CONFIG_MMU */ > > @@ -444,13 +416,16 @@ EXPORT_SYMBOL(dma_mmap_writecombine); > */ > void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle) > { > - WARN_ON(irqs_disabled()); > - > if (dma_release_from_coherent(dev, get_order(size), cpu_addr)) > return; > > size = PAGE_ALIGN(size); > > + if (__free_from_pool(cpu_addr, size)) > + return; > + > + WARN_ON(irqs_disabled()); > + > if (!arch_is_coherent()) > __dma_free_remap(cpu_addr, size); > > @@ -726,9 +701,6 @@ EXPORT_SYMBOL(dma_set_mask); > > static int __init dma_debug_do_init(void) > { > -#ifdef CONFIG_MMU > - arm_vmregion_create_proc("dma-mappings", &consistent_head); > -#endif > dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); > return 0; > } -- Kind regards, Minchan Kim -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/ Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>