RE: [LINUX PATCH] dma-mapping: Control memset operation using gfp flags

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Please ignore, sent to wrong list.

> -----Original Message-----
> From: Dylan Yip <dylan.yip@xxxxxxxxxx>
> Sent: Tuesday, September 17, 2019 1:21 PM
> To: linux-media@xxxxxxxxxxxxxxx; Satish Kumar Nagireddy
> <SATISHNA@xxxxxxxxxx>
> Cc: Dylan Yip <dylany@xxxxxxxxxx>
> Subject: [LINUX PATCH] dma-mapping: Control memset operation using gfp
> flags
> 
> In case of 4k video buffer, the allocation from a reserved memory is taking a
> long time, ~500ms. This is root caused to the memset() operations on the
> allocated memory which is consuming more cpu cycles.
> Due to this delay, we see that initial frames are being dropped.
> 
> To fix this, we have wrapped the default memset, done when allocating
> coherent memory, under the __GFP_ZERO flag. So, we only clear allocated
> memory if __GFP_ZERO flag is enabled. We believe this should be safe as the
> video decoder always writes before reading.
> This optimizes decoder initialization as we do not set the __GFP_ZERO flag
> when allocating memory for decoder. With this optimization, we don't see
> initial frame drops and decoder initialization time is ~100ms.
> 
> This patch adds plumbing through dma_alloc functions to pass gfp flag set by
> user to __dma_alloc_from_coherent(). Here gfp flag is checked for
> __GFP_ZERO. If present, we memset the buffer to 0 otherwise we skip
> memset.
> 
> Signed-off-by: Dylan Yip <dylan.yip@xxxxxxxxxx>
> ---
>  arch/arm/mm/dma-mapping-nommu.c |  2 +-
>  include/linux/dma-mapping.h     | 11 +++++++----
>  kernel/dma/coherent.c           | 15 +++++++++------
>  kernel/dma/mapping.c            |  2 +-
>  4 files changed, 18 insertions(+), 12 deletions(-)
> 
> diff --git a/arch/arm/mm/dma-mapping-nommu.c b/arch/arm/mm/dma-
> mapping-nommu.c index 52b8255..242b2c3 100644
> --- a/arch/arm/mm/dma-mapping-nommu.c
> +++ b/arch/arm/mm/dma-mapping-nommu.c
> @@ -35,7 +35,7 @@ static void *arm_nommu_dma_alloc(struct device *dev,
> size_t size,
>  				 unsigned long attrs)
> 
>  {
> -	void *ret = dma_alloc_from_global_coherent(size, dma_handle);
> +	void *ret = dma_alloc_from_global_coherent(size, dma_handle,
> gfp);
> 
>  	/*
>  	 * dma_alloc_from_global_coherent() may fail because:
> diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
> index f7d1eea..b715c9f 100644
> --- a/include/linux/dma-mapping.h
> +++ b/include/linux/dma-mapping.h
> @@ -160,24 +160,27 @@ static inline int is_device_dma_capable(struct
> device *dev)
>   * Don't use them in device drivers.
>   */
>  int dma_alloc_from_dev_coherent(struct device *dev, ssize_t size,
> -				       dma_addr_t *dma_handle, void **ret);
> +				       dma_addr_t *dma_handle, void **ret,
> +				       gfp_t flag);
>  int dma_release_from_dev_coherent(struct device *dev, int order, void
> *vaddr);
> 
>  int dma_mmap_from_dev_coherent(struct device *dev, struct
> vm_area_struct *vma,
>  			    void *cpu_addr, size_t size, int *ret);
> 
> -void *dma_alloc_from_global_coherent(ssize_t size, dma_addr_t
> *dma_handle);
> +void *dma_alloc_from_global_coherent(ssize_t size, dma_addr_t
> *dma_handle,
> +				     gfp_t flag);
>  int dma_release_from_global_coherent(int order, void *vaddr);  int
> dma_mmap_from_global_coherent(struct vm_area_struct *vma, void
> *cpu_addr,
>  				  size_t size, int *ret);
> 
>  #else
> -#define dma_alloc_from_dev_coherent(dev, size, handle, ret) (0)
> +#define dma_alloc_from_dev_coherent(dev, size, handle, ret, flag) (0)
>  #define dma_release_from_dev_coherent(dev, order, vaddr) (0)  #define
> dma_mmap_from_dev_coherent(dev, vma, vaddr, order, ret) (0)
> 
>  static inline void *dma_alloc_from_global_coherent(ssize_t size,
> -						   dma_addr_t *dma_handle)
> +						   dma_addr_t *dma_handle,
> +						   gfp_t flag)
>  {
>  	return NULL;
>  }
> diff --git a/kernel/dma/coherent.c b/kernel/dma/coherent.c index
> 29fd659..d85fab5 100644
> --- a/kernel/dma/coherent.c
> +++ b/kernel/dma/coherent.c
> @@ -136,7 +136,7 @@ void dma_release_declared_memory(struct device
> *dev)  EXPORT_SYMBOL(dma_release_declared_memory);
> 
>  static void *__dma_alloc_from_coherent(struct dma_coherent_mem
> *mem,
> -		ssize_t size, dma_addr_t *dma_handle)
> +		ssize_t size, dma_addr_t *dma_handle, gfp_t gfp_flag)
>  {
>  	int order = get_order(size);
>  	unsigned long flags;
> @@ -158,7 +158,8 @@ static void *__dma_alloc_from_coherent(struct
> dma_coherent_mem *mem,
>  	*dma_handle = mem->device_base + (pageno << PAGE_SHIFT);
>  	ret = mem->virt_base + (pageno << PAGE_SHIFT);
>  	spin_unlock_irqrestore(&mem->spinlock, flags);
> -	memset(ret, 0, size);
> +	if (gfp_flag & __GFP_ZERO)
> +		memset(ret, 0, size);
>  	return ret;
>  err:
>  	spin_unlock_irqrestore(&mem->spinlock, flags); @@ -172,6 +173,7
> @@ static void *__dma_alloc_from_coherent(struct dma_coherent_mem
> *mem,
>   * @dma_handle:	This will be filled with the correct dma handle
>   * @ret:	This pointer will be filled with the virtual address
>   *		to allocated area.
> + * @flag:      gfp flag set by user
>   *
>   * This function should be only called from per-arch dma_alloc_coherent()
>   * to support allocation from per-device coherent memory pools.
> @@ -180,24 +182,25 @@ static void *__dma_alloc_from_coherent(struct
> dma_coherent_mem *mem,
>   * generic memory areas, or !0 if dma_alloc_coherent should return @ret.
>   */
>  int dma_alloc_from_dev_coherent(struct device *dev, ssize_t size,
> -		dma_addr_t *dma_handle, void **ret)
> +		dma_addr_t *dma_handle, void **ret, gfp_t flag)
>  {
>  	struct dma_coherent_mem *mem =
> dev_get_coherent_memory(dev);
> 
>  	if (!mem)
>  		return 0;
> 
> -	*ret = __dma_alloc_from_coherent(mem, size, dma_handle);
> +	*ret = __dma_alloc_from_coherent(mem, size, dma_handle, flag);
>  	return 1;
>  }
> 
> -void *dma_alloc_from_global_coherent(ssize_t size, dma_addr_t
> *dma_handle)
> +void *dma_alloc_from_global_coherent(ssize_t size, dma_addr_t
> *dma_handle,
> +				     gfp_t flag)
>  {
>  	if (!dma_coherent_default_memory)
>  		return NULL;
> 
>  	return
> __dma_alloc_from_coherent(dma_coherent_default_memory, size,
> -			dma_handle);
> +			dma_handle, flag);
>  }
> 
>  static int __dma_release_from_coherent(struct dma_coherent_mem
> *mem, diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index
> b0038ca..bfea1d2 100644
> --- a/kernel/dma/mapping.c
> +++ b/kernel/dma/mapping.c
> @@ -272,7 +272,7 @@ void *dma_alloc_attrs(struct device *dev, size_t size,
> dma_addr_t *dma_handle,
> 
>  	WARN_ON_ONCE(!dev->coherent_dma_mask);
> 
> -	if (dma_alloc_from_dev_coherent(dev, size, dma_handle,
> &cpu_addr))
> +	if (dma_alloc_from_dev_coherent(dev, size, dma_handle,
> &cpu_addr,
> +flag))
>  		return cpu_addr;
> 
>  	/* let the implementation decide on the zone to allocate from: */
> --
> 2.7.4





[Index of Archives]     [Linux Input]     [Video for Linux]     [Gstreamer Embedded]     [Mplayer Users]     [Linux USB Devel]     [Linux Audio Users]     [Linux Kernel]     [Linux SCSI]     [Yosemite Backpacking]

  Powered by Linux