Re: [PATCH] PCI: endpoint: Migrate to Genalloc framework for outbound window memory allocation

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Sun, Mar 17, 2024 at 11:39:17AM +0530, Manivannan Sadhasivam wrote:
> As proposed during the last year 'PCI Endpoint Subsystem Open Items
> Discussion' of Linux Plumbers conference [1], let's migrate to Genalloc
> framework for managing the endpoint outbound window memory allocation.
> 
> PCI Endpoint subsystem is using a custom memory allocator in pci-epc-mem
> driver from the start for managing the memory required to map the host
> address space (outbound) in endpoint. Even though it works well, it
> completely defeats the purpose of the 'Genalloc framework', a general
> purpose memory allocator framework created to avoid various custom memory
> allocators in the kernel.
> 
> The migration to Genalloc framework is done is such a way that the existing
> API semantics are preserved. So that the callers of the EPC mem APIs do not
> need any modification (apart from the pcie-designware-epc driver that
> queries page size).
> 
> Internally, the EPC mem driver now uses Genalloc framework's
> 'gen_pool_first_fit_order_align' algorithm that aligns the allocated memory
> based on the requested size as like the previous allocator. And the
> page size passed during pci_epc_mem_init() API is used as the minimum order
> for the memory allocations.
> 
> During the migration, 'struct pci_epc_mem' is removed as it is seems
> redundant and the existing 'struct pci_epc_mem_window' in 'struct pci_epc'
> is now used to hold the address windows of the endpoint controller.
> 
> [1] https://lpc.events/event/17/contributions/1419/
> 
> Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@xxxxxxxxxx>
> ---
>  drivers/pci/controller/dwc/pcie-designware-ep.c |  14 +-
>  drivers/pci/endpoint/pci-epc-mem.c              | 182 +++++++++---------------
>  include/linux/pci-epc.h                         |  25 +---
>  3 files changed, 81 insertions(+), 140 deletions(-)
> 
> diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c b/drivers/pci/controller/dwc/pcie-designware-ep.c
> index 5befed2dc02b..37c612282eb6 100644
> --- a/drivers/pci/controller/dwc/pcie-designware-ep.c
> +++ b/drivers/pci/controller/dwc/pcie-designware-ep.c
> @@ -482,11 +482,11 @@ int dw_pcie_ep_raise_msi_irq(struct dw_pcie_ep *ep, u8 func_no,
>  		reg = ep_func->msi_cap + PCI_MSI_DATA_32;
>  		msg_data = dw_pcie_ep_readw_dbi(ep, func_no, reg);
>  	}
> -	aligned_offset = msg_addr_lower & (epc->mem->window.page_size - 1);
> +	aligned_offset = msg_addr_lower & (epc->windows[0]->page_size - 1);
>  	msg_addr = ((u64)msg_addr_upper) << 32 |
>  			(msg_addr_lower & ~aligned_offset);
>  	ret = dw_pcie_ep_map_addr(epc, func_no, 0, ep->msi_mem_phys, msg_addr,
> -				  epc->mem->window.page_size);
> +				  epc->windows[0]->page_size);
>  	if (ret)
>  		return ret;
>  
> @@ -550,10 +550,10 @@ int dw_pcie_ep_raise_msix_irq(struct dw_pcie_ep *ep, u8 func_no,
>  		return -EPERM;
>  	}
>  
> -	aligned_offset = msg_addr & (epc->mem->window.page_size - 1);
> +	aligned_offset = msg_addr & (epc->windows[0]->page_size - 1);
>  	msg_addr &= ~aligned_offset;
>  	ret = dw_pcie_ep_map_addr(epc, func_no, 0, ep->msi_mem_phys, msg_addr,
> -				  epc->mem->window.page_size);
> +				  epc->windows[0]->page_size);
>  	if (ret)
>  		return ret;
>  
> @@ -572,7 +572,7 @@ void dw_pcie_ep_exit(struct dw_pcie_ep *ep)
>  	dw_pcie_edma_remove(pci);
>  
>  	pci_epc_mem_free_addr(epc, ep->msi_mem_phys, ep->msi_mem,
> -			      epc->mem->window.page_size);
> +			      epc->windows[0]->page_size);
>  
>  	pci_epc_mem_exit(epc);
>  
> @@ -742,7 +742,7 @@ int dw_pcie_ep_init(struct dw_pcie_ep *ep)
>  	}
>  
>  	ep->msi_mem = pci_epc_mem_alloc_addr(epc, &ep->msi_mem_phys,
> -					     epc->mem->window.page_size);
> +					     epc->windows[0]->page_size);
>  	if (!ep->msi_mem) {
>  		ret = -ENOMEM;
>  		dev_err(dev, "Failed to reserve memory for MSI/MSI-X\n");
> @@ -770,7 +770,7 @@ int dw_pcie_ep_init(struct dw_pcie_ep *ep)
>  
>  err_free_epc_mem:
>  	pci_epc_mem_free_addr(epc, ep->msi_mem_phys, ep->msi_mem,
> -			      epc->mem->window.page_size);
> +			      epc->windows[0]->page_size);
>  
>  err_exit_epc_mem:
>  	pci_epc_mem_exit(epc);
> diff --git a/drivers/pci/endpoint/pci-epc-mem.c b/drivers/pci/endpoint/pci-epc-mem.c
> index a9c028f58da1..f9e6e1a6aeaa 100644
> --- a/drivers/pci/endpoint/pci-epc-mem.c
> +++ b/drivers/pci/endpoint/pci-epc-mem.c
> @@ -4,37 +4,18 @@
>   *
>   * Copyright (C) 2017 Texas Instruments
>   * Author: Kishon Vijay Abraham I <kishon@xxxxxx>
> + *
> + * Copyright (C) 2024 Linaro Ltd.
> + * Author: Manivannan Sadhasivam <manivannan.sadhasivam@xxxxxxxxxx>
>   */
>  
> +#include <linux/genalloc.h>
>  #include <linux/io.h>
>  #include <linux/module.h>
>  #include <linux/slab.h>
>  
>  #include <linux/pci-epc.h>
>  
> -/**
> - * pci_epc_mem_get_order() - determine the allocation order of a memory size
> - * @mem: address space of the endpoint controller
> - * @size: the size for which to get the order
> - *
> - * Reimplement get_order() for mem->page_size since the generic get_order
> - * always gets order with a constant PAGE_SIZE.
> - */
> -static int pci_epc_mem_get_order(struct pci_epc_mem *mem, size_t size)
> -{
> -	int order;
> -	unsigned int page_shift = ilog2(mem->window.page_size);
> -
> -	size--;
> -	size >>= page_shift;
> -#if BITS_PER_LONG == 32
> -	order = fls(size);
> -#else
> -	order = fls64(size);
> -#endif
> -	return order;
> -}
> -
>  /**
>   * pci_epc_multi_mem_init() - initialize the pci_epc_mem structure
>   * @epc: the EPC device that invoked pci_epc_mem_init
> @@ -48,17 +29,11 @@ int pci_epc_multi_mem_init(struct pci_epc *epc,
>  			   struct pci_epc_mem_window *windows,
>  			   unsigned int num_windows)
>  {
> -	struct pci_epc_mem *mem = NULL;
> -	unsigned long *bitmap = NULL;
> -	unsigned int page_shift;
> +	struct pci_epc_mem_window *window = NULL;
>  	size_t page_size;
> -	int bitmap_size;
> -	int pages;
>  	int ret;
>  	int i;
>  
> -	epc->num_windows = 0;
> -
>  	if (!windows || !num_windows)
>  		return -EINVAL;
>  
> @@ -70,45 +45,51 @@ int pci_epc_multi_mem_init(struct pci_epc *epc,
>  		page_size = windows[i].page_size;
>  		if (page_size < PAGE_SIZE)
>  			page_size = PAGE_SIZE;
> -		page_shift = ilog2(page_size);
> -		pages = windows[i].size >> page_shift;
> -		bitmap_size = BITS_TO_LONGS(pages) * sizeof(long);
>  
> -		mem = kzalloc(sizeof(*mem), GFP_KERNEL);
> -		if (!mem) {
> +		windows[i].pool = gen_pool_create(ilog2(page_size), -1);

I think it is not good to modify caller's memory. This funciton suppose
pass down read-only information. And set to epc->windows[i]. I think it'd
better to use epc->windows[i].pool/windows.

> +		if (!windows[i].pool) {
>  			ret = -ENOMEM;
> -			i--;
> -			goto err_mem;
> +			goto err_free_mem;
> +		}
> +
> +		gen_pool_set_algo(windows[i].pool, gen_pool_first_fit_order_align,
> +				  NULL);
> +
> +		windows[i].virt_base = ioremap(windows[i].phys_base, windows[i].size);
> +		ret = gen_pool_add_virt(windows[i].pool, (unsigned long)windows[i].virt_base,
> +					windows[i].phys_base, windows[i].size, -1);
> +		if (ret) {
> +			iounmap(windows[i].virt_base);
> +			gen_pool_destroy(epc->windows[i]->pool);

I think move all free to err path will be easy to understand.

> +			goto err_free_mem;
>  		}
>  
> -		bitmap = kzalloc(bitmap_size, GFP_KERNEL);
> -		if (!bitmap) {
> +		window = kzalloc(sizeof(*window), GFP_KERNEL);

According to below code                                                    
                                                                           
        epc->windows = kcalloc(num_windows, sizeof(*epc->windows), GFP_KERNEL);
        if (!epc->windows)                                                 
                return -ENOMEM;                                            
                                                                           
epc->windows already allocate whole num_windows * "struct pci_epc_mem_window".
I think you can direct use 'window = epc->windows + i', so needn't alloc      
additional memory for epc->windows[i].


> +		if (!window) {
>  			ret = -ENOMEM;
> -			kfree(mem);
> -			i--;
> -			goto err_mem;
> +			iounmap(windows[i].virt_base);
> +			gen_pool_destroy(epc->windows[i]->pool);
> +			goto err_free_mem;
>  		}
>  
> -		mem->window.phys_base = windows[i].phys_base;
> -		mem->window.size = windows[i].size;
> -		mem->window.page_size = page_size;
> -		mem->bitmap = bitmap;
> -		mem->pages = pages;
> -		mutex_init(&mem->lock);
> -		epc->windows[i] = mem;
> +		window->phys_base = windows[i].phys_base;
> +		window->size = windows[i].size;
> +		window->page_size = page_size;
> +		window->pool = windows[i].pool;
> +		epc->windows[i] = window;
>  	}
>  
> -	epc->mem = epc->windows[0];
>  	epc->num_windows = num_windows;
>  
>  	return 0;
>  
> -err_mem:
> -	for (; i >= 0; i--) {
> -		mem = epc->windows[i];
> -		kfree(mem->bitmap);
> -		kfree(mem);
> +err_free_mem:
> +	for (--i; i >= 0; i--) {
> +		iounmap(windows[i].virt_base);
> +		gen_pool_destroy(epc->windows[i]->pool);
> +		kfree(epc->windows[i]);
>  	}
> +
>  	kfree(epc->windows);
>  
>  	return ret;
> @@ -128,14 +109,15 @@ EXPORT_SYMBOL_GPL(pci_epc_multi_mem_init);
>  int pci_epc_mem_init(struct pci_epc *epc, phys_addr_t base,
>  		     size_t size, size_t page_size)
>  {
> -	struct pci_epc_mem_window mem_window;
> +	struct pci_epc_mem_window window;
>  
> -	mem_window.phys_base = base;
> -	mem_window.size = size;
> -	mem_window.page_size = page_size;
> +	window.phys_base = base;
> +	window.size = size;
> +	window.page_size = page_size;
>  
> -	return pci_epc_multi_mem_init(epc, &mem_window, 1);
> +	return pci_epc_multi_mem_init(epc, &window, 1);
>  }
> +

Remove extra empty line change

>  EXPORT_SYMBOL_GPL(pci_epc_mem_init);
>  
>  /**
> @@ -147,21 +129,20 @@ EXPORT_SYMBOL_GPL(pci_epc_mem_init);
>   */
>  void pci_epc_mem_exit(struct pci_epc *epc)
>  {
> -	struct pci_epc_mem *mem;
>  	int i;
>  
>  	if (!epc->num_windows)
>  		return;
>  
>  	for (i = 0; i < epc->num_windows; i++) {
> -		mem = epc->windows[i];
> -		kfree(mem->bitmap);
> -		kfree(mem);
> +		iounmap(epc->windows[i]->virt_base);
> +		gen_pool_destroy(epc->windows[i]->pool);
> +		kfree(epc->windows[i]);
>  	}
> +
>  	kfree(epc->windows);
>  
>  	epc->windows = NULL;
> -	epc->mem = NULL;
>  	epc->num_windows = 0;
>  }
>  EXPORT_SYMBOL_GPL(pci_epc_mem_exit);
> @@ -178,55 +159,42 @@ EXPORT_SYMBOL_GPL(pci_epc_mem_exit);
>  void __iomem *pci_epc_mem_alloc_addr(struct pci_epc *epc,
>  				     phys_addr_t *phys_addr, size_t size)
>  {
> +	struct pci_epc_mem_window *window;
>  	void __iomem *virt_addr = NULL;
> -	struct pci_epc_mem *mem;
> -	unsigned int page_shift;
> +	struct gen_pool *genpool;
>  	size_t align_size;
> -	int pageno;
> -	int order;
>  	int i;
>  
>  	for (i = 0; i < epc->num_windows; i++) {
> -		mem = epc->windows[i];
> -		mutex_lock(&mem->lock);
> -		align_size = ALIGN(size, mem->window.page_size);
> -		order = pci_epc_mem_get_order(mem, align_size);
> -
> -		pageno = bitmap_find_free_region(mem->bitmap, mem->pages,
> -						 order);
> -		if (pageno >= 0) {
> -			page_shift = ilog2(mem->window.page_size);
> -			*phys_addr = mem->window.phys_base +
> -				((phys_addr_t)pageno << page_shift);
> -			virt_addr = ioremap(*phys_addr, align_size);
> -			if (!virt_addr) {
> -				bitmap_release_region(mem->bitmap,
> -						      pageno, order);
> -				mutex_unlock(&mem->lock);
> -				continue;
> -			}
> -			mutex_unlock(&mem->lock);
> -			return virt_addr;
> -		}
> -		mutex_unlock(&mem->lock);
> +		window = epc->windows[i];
> +		genpool = window->pool;
> +		align_size = ALIGN(size, window->page_size);
> +
> +		virt_addr = (void __iomem *)gen_pool_alloc(genpool, align_size);
> +		if (!virt_addr)
> +			continue;
> +
> +		*phys_addr = gen_pool_virt_to_phys(genpool, (unsigned long)virt_addr);
> +
> +		break;
>  	}
>  
>  	return virt_addr;
>  }
>  EXPORT_SYMBOL_GPL(pci_epc_mem_alloc_addr);
>  
> -static struct pci_epc_mem *pci_epc_get_matching_window(struct pci_epc *epc,
> +static struct pci_epc_mem_window *pci_epc_get_matching_window(struct pci_epc *epc,
>  						       phys_addr_t phys_addr)
>  {
> -	struct pci_epc_mem *mem;
> +	struct pci_epc_mem_window *window;
>  	int i;
>  
>  	for (i = 0; i < epc->num_windows; i++) {
> -		mem = epc->windows[i];
> +		window = epc->windows[i];
>  
> -		if (phys_addr >= mem->window.phys_base &&
> -		    phys_addr < (mem->window.phys_base + mem->window.size))
> -			return mem;
> +		if (phys_addr >= window->phys_base &&
> +		    phys_addr < (window->phys_base + window->size))
> +			return window;
>  	}
>  
>  	return NULL;
> @@ -244,27 +212,15 @@ static struct pci_epc_mem *pci_epc_get_matching_window(struct pci_epc *epc,
>  void pci_epc_mem_free_addr(struct pci_epc *epc, phys_addr_t phys_addr,
>  			   void __iomem *virt_addr, size_t size)
>  {
> -	struct pci_epc_mem *mem;
> -	unsigned int page_shift;
> -	size_t page_size;
> -	int pageno;
> -	int order;
> +	struct pci_epc_mem_window *window;
>  
> -	mem = pci_epc_get_matching_window(epc, phys_addr);
> -	if (!mem) {
> +	window = pci_epc_get_matching_window(epc, phys_addr);
> +	if (!window) {
>  		pr_err("failed to get matching window\n");
>  		return;
>  	}
>  
> -	page_size = mem->window.page_size;
> -	page_shift = ilog2(page_size);
> -	iounmap(virt_addr);
> -	pageno = (phys_addr - mem->window.phys_base) >> page_shift;
> -	size = ALIGN(size, page_size);
> -	order = pci_epc_mem_get_order(mem, size);
> -	mutex_lock(&mem->lock);
> -	bitmap_release_region(mem->bitmap, pageno, order);
> -	mutex_unlock(&mem->lock);
> +	gen_pool_free(window->pool, (unsigned long)virt_addr, size);
>  }
>  EXPORT_SYMBOL_GPL(pci_epc_mem_free_addr);
>  
> diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h
> index 40ea18f5aa02..37ea96ed3432 100644
> --- a/include/linux/pci-epc.h
> +++ b/include/linux/pci-epc.h
> @@ -87,30 +87,19 @@ struct pci_epc_ops {
>  /**
>   * struct pci_epc_mem_window - address window of the endpoint controller
>   * @phys_base: physical base address of the PCI address window
> + * @virt_base: virtual base address of the PCI address window
> + * @pool: memory pool descriptor
>   * @size: the size of the PCI address window
>   * @page_size: size of each page
>   */
>  struct pci_epc_mem_window {
>  	phys_addr_t	phys_base;
> +	void __iomem	*virt_base;
> +	struct gen_pool *pool;
>  	size_t		size;
>  	size_t		page_size;
>  };
>  
> -/**
> - * struct pci_epc_mem - address space of the endpoint controller
> - * @window: address window of the endpoint controller
> - * @bitmap: bitmap to manage the PCI address space
> - * @pages: number of bits representing the address region
> - * @lock: mutex to protect bitmap
> - */
> -struct pci_epc_mem {
> -	struct pci_epc_mem_window window;
> -	unsigned long	*bitmap;
> -	int		pages;
> -	/* mutex to protect against concurrent access for memory allocation*/
> -	struct mutex	lock;
> -};
> -
>  /**
>   * struct pci_epc - represents the PCI EPC device
>   * @dev: PCI EPC device
> @@ -118,9 +107,6 @@ struct pci_epc_mem {
>   * @list_lock: Mutex for protecting pci_epf list
>   * @ops: function pointers for performing endpoint operations
>   * @windows: array of address space of the endpoint controller
> - * @mem: first window of the endpoint controller, which corresponds to
> - *       default address space of the endpoint controller supporting
> - *       single window.
>   * @num_windows: number of windows supported by device
>   * @max_functions: max number of functions that can be configured in this EPC
>   * @max_vfs: Array indicating the maximum number of virtual functions that can
> @@ -134,8 +120,7 @@ struct pci_epc {
>  	struct list_head		pci_epf;
>  	struct mutex			list_lock;
>  	const struct pci_epc_ops	*ops;
> -	struct pci_epc_mem		**windows;
> -	struct pci_epc_mem		*mem;
> +	struct pci_epc_mem_window	**windows;
>  	unsigned int			num_windows;
>  	u8				max_functions;
>  	u8				*max_vfs;
> 
> ---
> base-commit: 256833a66670ff28b7c1bddbd17973619e5281fd
> change-id: 20240317-pci-ep-genalloc-fa89f5e487e3
> 
> Best regards,
> -- 
> Manivannan Sadhasivam <manivannan.sadhasivam@xxxxxxxxxx>
> 




[Index of Archives]     [DMA Engine]     [Linux Coverity]     [Linux USB]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [Greybus]

  Powered by Linux