Please ignore, sent to wrong list. > -----Original Message----- > From: Dylan Yip <dylan.yip@xxxxxxxxxx> > Sent: Tuesday, September 17, 2019 1:21 PM > To: linux-media@xxxxxxxxxxxxxxx; Satish Kumar Nagireddy > <SATISHNA@xxxxxxxxxx> > Cc: Dylan Yip <dylany@xxxxxxxxxx> > Subject: [LINUX PATCH] dma-mapping: Control memset operation using gfp > flags > > In case of 4k video buffer, the allocation from a reserved memory is taking a > long time, ~500ms. This is root caused to the memset() operations on the > allocated memory which is consuming more cpu cycles. > Due to this delay, we see that initial frames are being dropped. > > To fix this, we have wrapped the default memset, done when allocating > coherent memory, under the __GFP_ZERO flag. So, we only clear allocated > memory if __GFP_ZERO flag is enabled. We believe this should be safe as the > video decoder always writes before reading. > This optimizes decoder initialization as we do not set the __GFP_ZERO flag > when allocating memory for decoder. With this optimization, we don't see > initial frame drops and decoder initialization time is ~100ms. > > This patch adds plumbing through dma_alloc functions to pass gfp flag set by > user to __dma_alloc_from_coherent(). Here gfp flag is checked for > __GFP_ZERO. If present, we memset the buffer to 0 otherwise we skip > memset. > > Signed-off-by: Dylan Yip <dylan.yip@xxxxxxxxxx> > --- > arch/arm/mm/dma-mapping-nommu.c | 2 +- > include/linux/dma-mapping.h | 11 +++++++---- > kernel/dma/coherent.c | 15 +++++++++------ > kernel/dma/mapping.c | 2 +- > 4 files changed, 18 insertions(+), 12 deletions(-) > > diff --git a/arch/arm/mm/dma-mapping-nommu.c b/arch/arm/mm/dma- > mapping-nommu.c index 52b8255..242b2c3 100644 > --- a/arch/arm/mm/dma-mapping-nommu.c > +++ b/arch/arm/mm/dma-mapping-nommu.c > @@ -35,7 +35,7 @@ static void *arm_nommu_dma_alloc(struct device *dev, > size_t size, > unsigned long attrs) > > { > - void *ret = dma_alloc_from_global_coherent(size, dma_handle); > + void *ret = dma_alloc_from_global_coherent(size, dma_handle, > gfp); > > /* > * dma_alloc_from_global_coherent() may fail because: > diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h > index f7d1eea..b715c9f 100644 > --- a/include/linux/dma-mapping.h > +++ b/include/linux/dma-mapping.h > @@ -160,24 +160,27 @@ static inline int is_device_dma_capable(struct > device *dev) > * Don't use them in device drivers. > */ > int dma_alloc_from_dev_coherent(struct device *dev, ssize_t size, > - dma_addr_t *dma_handle, void **ret); > + dma_addr_t *dma_handle, void **ret, > + gfp_t flag); > int dma_release_from_dev_coherent(struct device *dev, int order, void > *vaddr); > > int dma_mmap_from_dev_coherent(struct device *dev, struct > vm_area_struct *vma, > void *cpu_addr, size_t size, int *ret); > > -void *dma_alloc_from_global_coherent(ssize_t size, dma_addr_t > *dma_handle); > +void *dma_alloc_from_global_coherent(ssize_t size, dma_addr_t > *dma_handle, > + gfp_t flag); > int dma_release_from_global_coherent(int order, void *vaddr); int > dma_mmap_from_global_coherent(struct vm_area_struct *vma, void > *cpu_addr, > size_t size, int *ret); > > #else > -#define dma_alloc_from_dev_coherent(dev, size, handle, ret) (0) > +#define dma_alloc_from_dev_coherent(dev, size, handle, ret, flag) (0) > #define dma_release_from_dev_coherent(dev, order, vaddr) (0) #define > dma_mmap_from_dev_coherent(dev, vma, vaddr, order, ret) (0) > > static inline void *dma_alloc_from_global_coherent(ssize_t size, > - dma_addr_t *dma_handle) > + dma_addr_t *dma_handle, > + gfp_t flag) > { > return NULL; > } > diff --git a/kernel/dma/coherent.c b/kernel/dma/coherent.c index > 29fd659..d85fab5 100644 > --- a/kernel/dma/coherent.c > +++ b/kernel/dma/coherent.c > @@ -136,7 +136,7 @@ void dma_release_declared_memory(struct device > *dev) EXPORT_SYMBOL(dma_release_declared_memory); > > static void *__dma_alloc_from_coherent(struct dma_coherent_mem > *mem, > - ssize_t size, dma_addr_t *dma_handle) > + ssize_t size, dma_addr_t *dma_handle, gfp_t gfp_flag) > { > int order = get_order(size); > unsigned long flags; > @@ -158,7 +158,8 @@ static void *__dma_alloc_from_coherent(struct > dma_coherent_mem *mem, > *dma_handle = mem->device_base + (pageno << PAGE_SHIFT); > ret = mem->virt_base + (pageno << PAGE_SHIFT); > spin_unlock_irqrestore(&mem->spinlock, flags); > - memset(ret, 0, size); > + if (gfp_flag & __GFP_ZERO) > + memset(ret, 0, size); > return ret; > err: > spin_unlock_irqrestore(&mem->spinlock, flags); @@ -172,6 +173,7 > @@ static void *__dma_alloc_from_coherent(struct dma_coherent_mem > *mem, > * @dma_handle: This will be filled with the correct dma handle > * @ret: This pointer will be filled with the virtual address > * to allocated area. > + * @flag: gfp flag set by user > * > * This function should be only called from per-arch dma_alloc_coherent() > * to support allocation from per-device coherent memory pools. > @@ -180,24 +182,25 @@ static void *__dma_alloc_from_coherent(struct > dma_coherent_mem *mem, > * generic memory areas, or !0 if dma_alloc_coherent should return @ret. > */ > int dma_alloc_from_dev_coherent(struct device *dev, ssize_t size, > - dma_addr_t *dma_handle, void **ret) > + dma_addr_t *dma_handle, void **ret, gfp_t flag) > { > struct dma_coherent_mem *mem = > dev_get_coherent_memory(dev); > > if (!mem) > return 0; > > - *ret = __dma_alloc_from_coherent(mem, size, dma_handle); > + *ret = __dma_alloc_from_coherent(mem, size, dma_handle, flag); > return 1; > } > > -void *dma_alloc_from_global_coherent(ssize_t size, dma_addr_t > *dma_handle) > +void *dma_alloc_from_global_coherent(ssize_t size, dma_addr_t > *dma_handle, > + gfp_t flag) > { > if (!dma_coherent_default_memory) > return NULL; > > return > __dma_alloc_from_coherent(dma_coherent_default_memory, size, > - dma_handle); > + dma_handle, flag); > } > > static int __dma_release_from_coherent(struct dma_coherent_mem > *mem, diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index > b0038ca..bfea1d2 100644 > --- a/kernel/dma/mapping.c > +++ b/kernel/dma/mapping.c > @@ -272,7 +272,7 @@ void *dma_alloc_attrs(struct device *dev, size_t size, > dma_addr_t *dma_handle, > > WARN_ON_ONCE(!dev->coherent_dma_mask); > > - if (dma_alloc_from_dev_coherent(dev, size, dma_handle, > &cpu_addr)) > + if (dma_alloc_from_dev_coherent(dev, size, dma_handle, > &cpu_addr, > +flag)) > return cpu_addr; > > /* let the implementation decide on the zone to allocate from: */ > -- > 2.7.4