Re: [PATCH v2 2/3] media: ipu6: optimize the IPU6 MMU mapping flow

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Sakari,

On 11/1/24 9:16 PM, Sakari Ailus wrote:
> Hi Bingbu,
> 
> Thanks for the update.
> 
> On Fri, Oct 25, 2024 at 01:52:31PM +0800, Bingbu Cao wrote:
>> ipu6_mmu_map() operated on a per-page basis, it leads frequent
>> spin_lock/unlock() and clflush_cache_range() for each page, it
>> will cause inefficiencies especially when handling dma-bufs
>> with large number of pages. However, the pages are likely concentrated
>> pages by IOMMU DMA driver, IPU MMU driver can map the concentrated
>> pages into less entries in l1 table.
>>
>> This change enhances ipu6_mmu_map() with batching process multiple
>> contiguous pages. It significantly reduces calls for spin_lock/unlock
>> and clflush_cache_range() and improve the performance.
>>
>> Signed-off-by: Bingbu Cao <bingbu.cao@xxxxxxxxx>
>> Signed-off-by: Jianhui Dai <jianhui.j.dai@xxxxxxxxx>
>> ---
>>  drivers/media/pci/intel/ipu6/ipu6-mmu.c | 144 +++++++++++++++-----------------
>>  1 file changed, 69 insertions(+), 75 deletions(-)
>>
>> diff --git a/drivers/media/pci/intel/ipu6/ipu6-mmu.c b/drivers/media/pci/intel/ipu6/ipu6-mmu.c
>> index 9ea6789bca5e..e957ccb4691d 100644
>> --- a/drivers/media/pci/intel/ipu6/ipu6-mmu.c
>> +++ b/drivers/media/pci/intel/ipu6/ipu6-mmu.c
>> @@ -295,72 +295,90 @@ static size_t l2_unmap(struct ipu6_mmu_info *mmu_info, unsigned long iova,
>>  static int l2_map(struct ipu6_mmu_info *mmu_info, unsigned long iova,
>>  		  phys_addr_t paddr, size_t size)
>>  {
>> -	u32 l1_idx = iova >> ISP_L1PT_SHIFT;
>> -	u32 iova_start = iova;
>> +	struct device *dev = mmu_info->dev;
>> +	unsigned int l2_entries;
>>  	u32 *l2_pt, *l2_virt;
>>  	unsigned int l2_idx;
>>  	unsigned long flags;
>> +	size_t mapped = 0;
>>  	dma_addr_t dma;
>>  	u32 l1_entry;
>> -
>> -	dev_dbg(mmu_info->dev,
>> -		"mapping l2 page table for l1 index %u (iova %8.8x)\n",
>> -		l1_idx, (u32)iova);
>> +	u32 l1_idx;
>> +	int err = 0;
>>  
>>  	spin_lock_irqsave(&mmu_info->lock, flags);
>> -	l1_entry = mmu_info->l1_pt[l1_idx];
>> -	if (l1_entry == mmu_info->dummy_l2_pteval) {
>> -		l2_virt = mmu_info->l2_pts[l1_idx];
>> -		if (likely(!l2_virt)) {
>> -			l2_virt = alloc_l2_pt(mmu_info);
>> -			if (!l2_virt) {
>> -				spin_unlock_irqrestore(&mmu_info->lock, flags);
>> -				return -ENOMEM;
>> -			}
>> -		}
>> -
>> -		dma = map_single(mmu_info, l2_virt);
>> -		if (!dma) {
>> -			dev_err(mmu_info->dev, "Failed to map l2pt page\n");
>> -			free_page((unsigned long)l2_virt);
>> -			spin_unlock_irqrestore(&mmu_info->lock, flags);
>> -			return -EINVAL;
>> -		}
>> -
>> -		l1_entry = dma >> ISP_PADDR_SHIFT;
>> -
>> -		dev_dbg(mmu_info->dev, "page for l1_idx %u %p allocated\n",
>> -			l1_idx, l2_virt);
>> -		mmu_info->l1_pt[l1_idx] = l1_entry;
>> -		mmu_info->l2_pts[l1_idx] = l2_virt;
>> -		clflush_cache_range((void *)&mmu_info->l1_pt[l1_idx],
>> -				    sizeof(mmu_info->l1_pt[l1_idx]));
>> -	}
>> -
>> -	l2_pt = mmu_info->l2_pts[l1_idx];
>> -
>> -	dev_dbg(mmu_info->dev, "l2_pt at %p with dma 0x%x\n", l2_pt, l1_entry);
>>  
>>  	paddr = ALIGN(paddr, ISP_PAGE_SIZE);
>> +	for (l1_idx = iova >> ISP_L1PT_SHIFT;
>> +	     size > 0 && l1_idx < ISP_L1PT_PTES; l1_idx++) {
>> +		dev_dbg(dev,
>> +			"mapping l2 page table for l1 index %u (iova %8.8x)\n",
>> +			l1_idx, (u32)iova);
>>  
>> -	l2_idx = (iova_start & ISP_L2PT_MASK) >> ISP_L2PT_SHIFT;
>> +		l1_entry = mmu_info->l1_pt[l1_idx];
>> +		if (l1_entry == mmu_info->dummy_l2_pteval) {
>> +			l2_virt = mmu_info->l2_pts[l1_idx];
>> +			if (likely(!l2_virt)) {
> 
> likely() should only be used if it's really, really uncommon on a hot path.
> I don't think that's the case here.
> 
> Can it even happen l2_virt is NULL here? l1_entry has been assigned while
> the l2 PT page has been mapped so both are either dummy / NULL or valid
> values. The l2 pages aren't unmapped in unmap, but only in
> ipu6_mmu_destroy(). Same goes for the map_single() if l2_virt was valid
> already, the page has been mapped, too.
> 
> This seems like a pre-existing problem. If there is no actual bug here but
> just inconsistent implementation (so it would seem), I think it's fine to
> address this on top.
> 
> Can you conform this?

I would like to address this in another patch instead of this series.
What do you think?

> 
>> +				l2_virt = alloc_l2_pt(mmu_info);
>> +				if (!l2_virt) {
>> +					err = -ENOMEM;
>> +					goto error;
>> +				}
>> +			}
>>  
>> -	dev_dbg(mmu_info->dev, "l2_idx %u, phys 0x%8.8x\n", l2_idx,
>> -		l2_pt[l2_idx]);
>> -	if (l2_pt[l2_idx] != mmu_info->dummy_page_pteval) {
>> -		spin_unlock_irqrestore(&mmu_info->lock, flags);
>> -		return -EINVAL;
>> +			dma = map_single(mmu_info, l2_virt);
>> +			if (!dma) {
>> +				dev_err(dev, "Failed to map l2pt page\n");
>> +				free_page((unsigned long)l2_virt);
>> +				err = -EINVAL;
>> +				goto error;
>> +			}
>> +
>> +			l1_entry = dma >> ISP_PADDR_SHIFT;
>> +
>> +			dev_dbg(dev, "page for l1_idx %u %p allocated\n",
>> +				l1_idx, l2_virt);
>> +			mmu_info->l1_pt[l1_idx] = l1_entry;
>> +			mmu_info->l2_pts[l1_idx] = l2_virt;
>> +
>> +			clflush_cache_range(&mmu_info->l1_pt[l1_idx],
>> +					    sizeof(mmu_info->l1_pt[l1_idx]));
>> +		}
>> +
>> +		l2_pt = mmu_info->l2_pts[l1_idx];
>> +		l2_entries = 0;
>> +
>> +		for (l2_idx = (iova & ISP_L2PT_MASK) >> ISP_L2PT_SHIFT;
>> +		     size > 0 && l2_idx < ISP_L2PT_PTES; l2_idx++) {
>> +			l2_pt[l2_idx] = paddr >> ISP_PADDR_SHIFT;
>> +
>> +			dev_dbg(dev, "l2 index %u mapped as 0x%8.8x\n", l2_idx,
>> +				l2_pt[l2_idx]);
>> +
>> +			iova += ISP_PAGE_SIZE;
>> +			paddr += ISP_PAGE_SIZE;
>> +			mapped += ISP_PAGE_SIZE;
>> +			size -= ISP_PAGE_SIZE;
>> +
>> +			l2_entries++;
>> +		}
>> +
>> +		WARN_ON_ONCE(!l2_entries);
>> +		clflush_cache_range(&l2_pt[l2_idx - l2_entries],
>> +				    sizeof(l2_pt[0]) * l2_entries);
>>  	}
>>  
>> -	l2_pt[l2_idx] = paddr >> ISP_PADDR_SHIFT;
>> -
>> -	clflush_cache_range((void *)&l2_pt[l2_idx], sizeof(l2_pt[l2_idx]));
>>  	spin_unlock_irqrestore(&mmu_info->lock, flags);
>>  
>> -	dev_dbg(mmu_info->dev, "l2 index %u mapped as 0x%8.8x\n", l2_idx,
>> -		l2_pt[l2_idx]);
>> -
>>  	return 0;
>> +
>> +error:
>> +	spin_unlock_irqrestore(&mmu_info->lock, flags);
>> +	/* unroll mapping in case something went wrong */
>> +	if (mapped)
>> +		l2_unmap(mmu_info, iova - mapped, paddr - mapped, mapped);
>> +
>> +	return err;
>>  }
>>  
>>  static int __ipu6_mmu_map(struct ipu6_mmu_info *mmu_info, unsigned long iova,
>> @@ -692,10 +710,7 @@ size_t ipu6_mmu_unmap(struct ipu6_mmu_info *mmu_info, unsigned long iova,
>>  int ipu6_mmu_map(struct ipu6_mmu_info *mmu_info, unsigned long iova,
>>  		 phys_addr_t paddr, size_t size)
>>  {
>> -	unsigned long orig_iova = iova;
>>  	unsigned int min_pagesz;
>> -	size_t orig_size = size;
>> -	int ret = 0;
>>  
>>  	if (mmu_info->pgsize_bitmap == 0UL)
>>  		return -ENODEV;
>> @@ -718,28 +733,7 @@ int ipu6_mmu_map(struct ipu6_mmu_info *mmu_info, unsigned long iova,
>>  	dev_dbg(mmu_info->dev, "map: iova 0x%lx pa %pa size 0x%zx\n",
>>  		iova, &paddr, size);
>>  
>> -	while (size) {
>> -		size_t pgsize = ipu6_mmu_pgsize(mmu_info->pgsize_bitmap,
>> -						iova | paddr, size);
>> -
>> -		dev_dbg(mmu_info->dev,
>> -			"mapping: iova 0x%lx pa %pa pgsize 0x%zx\n",
>> -			iova, &paddr, pgsize);
>> -
>> -		ret = __ipu6_mmu_map(mmu_info, iova, paddr, pgsize);
>> -		if (ret)
>> -			break;
>> -
>> -		iova += pgsize;
>> -		paddr += pgsize;
>> -		size -= pgsize;
>> -	}
>> -
>> -	/* unroll mapping in case something went wrong */
>> -	if (ret)
>> -		ipu6_mmu_unmap(mmu_info, orig_iova, orig_size - size);
>> -
>> -	return ret;
>> +	return __ipu6_mmu_map(mmu_info, iova, paddr, size);
>>  }
>>  
>>  static void ipu6_mmu_destroy(struct ipu6_mmu *mmu)
> 

-- 
Best regards,
Bingbu Cao




[Index of Archives]     [Linux Input]     [Video for Linux]     [Gstreamer Embedded]     [Mplayer Users]     [Linux USB Devel]     [Linux Audio Users]     [Linux Kernel]     [Linux SCSI]     [Yosemite Backpacking]

  Powered by Linux