At Thu, 21 Aug 2008 08:55:12 -0500, James Bottomley wrote: > > On Thu, 2008-08-21 at 12:19 +0200, Takashi Iwai wrote: > > At Wed, 20 Aug 2008 12:58:08 -0500, > > James Bottomley wrote: > > > > > > On Wed, 2008-08-20 at 18:53 +0200, Takashi Iwai wrote: > > > > > I'm afraid there are several problems. The first is that it doesn't do > > > > > what you want. You can't map a coherent page to userspace (which is at > > > > > a non congruent address on parisc) and still expect it to be > > > > > coherent ... there's going to have to be fiddling with the page table > > > > > caches to make sure coherency isn't destroyed by aliasing effects > > > > > > > > Hmm... how bad would be the coherency with such a simple mmap method? > > > > In most cases, we don't need the "perfect" coherency. Usually one > > > > process mmaps the whole buffer and keep reading/writing. There is > > > > another use case (sharing the mmapped buffer by multiple processes), > > > > but this can be disabled if we know it's not feasible beforehand. > > > > > > Unfortunately, the incoherency is between the user and the kernel. > > > That's where the aliasing effects occur, so realistically, even though > > > you've mapped coherent memory to the user, the coherency of that memory > > > is only device <-> kernel. When the any single user space process > > > writes to it, the device won't see the write unless the user issues a > > > flush. > > > > I see. In the case of ALSA mmap mode, a user issues an ioctl to > > notify after the read/write access, so it'd be relatively easy to add > > a sync operation. > > > > Does the call of dma_sync_*_for_device() suffice for that purpose? > > No ... dma_sync_* sync's from the kernel to the device ... you don't > need that if the memory is already coherent. > > The problem is that the data you want the device to see is held in a > cache above the user space mapping ... it's that cache that has to be > flushed (i.e. you need to flush through the user mappings, not through > the kernel ones). > > > (BTW, how does the fb driver work on this?) > > It sets the shared page to uncached on all its mappings. Turning off > caching (assuming the platform can do it ... not all can) is a good way > to eliminate aliasing issues. Thanks for clarification. How about the revised patch below (for PARISC)? Takashi diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c index ccd61b9..680b075 100644 --- a/arch/parisc/kernel/pci-dma.c +++ b/arch/parisc/kernel/pci-dma.c @@ -545,6 +545,20 @@ static void pa11_dma_sync_sg_for_device(struct device *dev, struct scatterlist * flush_kernel_dcache_range(sg_virt_addr(sglist), sglist->length); } +static int pa11_dma_mmap_coherent(struct device *dev, + struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t handle, + size_t size) +{ + struct page *pg; + pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE; + cpu_addr = __va(handle); + pg = virt_to_page(cpu_addr); + return remap_pfn_range(vma, vma->vm_start, + page_to_pfn(pg) + vma->vm_pgoff, + size, vma->vm_page_prot); +} + struct hppa_dma_ops pcxl_dma_ops = { .dma_supported = pa11_dma_supported, .alloc_consistent = pa11_dma_alloc_consistent, @@ -558,6 +572,7 @@ struct hppa_dma_ops pcxl_dma_ops = { .dma_sync_single_for_device = pa11_dma_sync_single_for_device, .dma_sync_sg_for_cpu = pa11_dma_sync_sg_for_cpu, .dma_sync_sg_for_device = pa11_dma_sync_sg_for_device, + .mmap_coherent = pa11_dma_mmap_coherent, }; static void *fail_alloc_consistent(struct device *dev, size_t size, @@ -598,4 +613,5 @@ struct hppa_dma_ops pcx_dma_ops = { .dma_sync_single_for_device = pa11_dma_sync_single_for_device, .dma_sync_sg_for_cpu = pa11_dma_sync_sg_for_cpu, .dma_sync_sg_for_device = pa11_dma_sync_sg_for_device, + .mmap_coherent = pa11_dma_mmap_coherent, }; diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c index b30e38f..dd2ab2c 100644 --- a/drivers/parisc/ccio-dma.c +++ b/drivers/parisc/ccio-dma.c @@ -1014,6 +1014,19 @@ ccio_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents, DBG_RUN_SG("%s() DONE (nents %d)\n", __func__, nents); } +static int ccio_dma_mmap_coherent(struct device *dev, + struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t handle, + size_t size) +{ + struct page *pg; + pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE; + pg = virt_to_page(cpu_addr); + return remap_pfn_range(vma, vma->vm_start, + page_to_pfn(pg) + vma->vm_pgoff, + size, vma->vm_page_prot); +} + static struct hppa_dma_ops ccio_ops = { .dma_supported = ccio_dma_supported, .alloc_consistent = ccio_alloc_consistent, @@ -1027,6 +1040,7 @@ static struct hppa_dma_ops ccio_ops = { .dma_sync_single_for_device = NULL, /* NOP for U2/Uturn */ .dma_sync_sg_for_cpu = NULL, /* ditto */ .dma_sync_sg_for_device = NULL, /* ditto */ + .mmap_coherent = ccio_dma_mmap_coherent, }; #ifdef CONFIG_PROC_FS diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c index bc73b96..403d66d 100644 --- a/drivers/parisc/sba_iommu.c +++ b/drivers/parisc/sba_iommu.c @@ -1057,6 +1057,19 @@ sba_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents, } +static int sba_dma_mmap_coherent(struct device *dev, + struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t handle, + size_t size) +{ + struct page *pg; + pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE; + pg = virt_to_page(cpu_addr); + return remap_pfn_range(vma, vma->vm_start, + page_to_pfn(pg) + vma->vm_pgoff, + size, vma->vm_page_prot); +} + static struct hppa_dma_ops sba_ops = { .dma_supported = sba_dma_supported, .alloc_consistent = sba_alloc_consistent, @@ -1070,6 +1083,7 @@ static struct hppa_dma_ops sba_ops = { .dma_sync_single_for_device = NULL, .dma_sync_sg_for_cpu = NULL, .dma_sync_sg_for_device = NULL, + .mmap_coherent = sba_dma_mmap_coherent, }; diff --git a/include/asm-parisc/dma-mapping.h b/include/asm-parisc/dma-mapping.h index 53af696..5b357b3 100644 --- a/include/asm-parisc/dma-mapping.h +++ b/include/asm-parisc/dma-mapping.h @@ -19,6 +19,9 @@ struct hppa_dma_ops { void (*dma_sync_single_for_device)(struct device *dev, dma_addr_t iova, unsigned long offset, size_t size, enum dma_data_direction direction); void (*dma_sync_sg_for_cpu)(struct device *dev, struct scatterlist *sg, int nelems, enum dma_data_direction direction); void (*dma_sync_sg_for_device)(struct device *dev, struct scatterlist *sg, int nelems, enum dma_data_direction direction); + int (*mmap_coherent)(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t handle, size_t size); + }; /* @@ -204,6 +207,15 @@ dma_cache_sync(struct device *dev, void *vaddr, size_t size, flush_kernel_dcache_range((unsigned long)vaddr, size); } +static inline int +dma_mmap_coherent(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t handle, size_t size) +{ + if (!hppa_dma_ops->mmap_coherent) + return -ENXIO; + return hppa_dma_ops->mmap_coherent(dev, vma, cpu_addr, handle, size); +} + static inline void * parisc_walk_tree(struct device *dev) {