> From: Lu Baolu [mailto:baolu.lu@xxxxxxxxxxxxxxx] > Sent: Monday, September 23, 2019 8:25 PM > > If a dmar domain has DOMAIN_FLAG_FIRST_LEVEL_TRANS bit set > in its flags, IOMMU will use the first level page table for > translation. Hence, we need to map or unmap addresses in the > first level page table. > > Cc: Ashok Raj <ashok.raj@xxxxxxxxx> > Cc: Jacob Pan <jacob.jun.pan@xxxxxxxxxxxxxxx> > Cc: Kevin Tian <kevin.tian@xxxxxxxxx> > Cc: Liu Yi L <yi.l.liu@xxxxxxxxx> > Cc: Yi Sun <yi.y.sun@xxxxxxxxxxxxxxx> > Signed-off-by: Lu Baolu <baolu.lu@xxxxxxxxxxxxxxx> > --- > drivers/iommu/intel-iommu.c | 94 ++++++++++++++++++++++++++++++++- > ---- > 1 file changed, 82 insertions(+), 12 deletions(-) > > diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c > index 9cfe8098d993..103480016010 100644 > --- a/drivers/iommu/intel-iommu.c > +++ b/drivers/iommu/intel-iommu.c > @@ -168,6 +168,11 @@ static inline unsigned long virt_to_dma_pfn(void > *p) > return page_to_dma_pfn(virt_to_page(p)); > } > > +static inline unsigned long dma_pfn_to_addr(unsigned long pfn) > +{ > + return pfn << VTD_PAGE_SHIFT; > +} > + > /* global iommu list, set NULL for ignored DMAR units */ > static struct intel_iommu **g_iommus; > > @@ -307,6 +312,9 @@ static int hw_pass_through = 1; > */ > #define DOMAIN_FLAG_LOSE_CHILDREN BIT(1) > > +/* Domain uses first level translation for DMA remapping. */ > +#define DOMAIN_FLAG_FIRST_LEVEL_TRANS BIT(2) > + > #define for_each_domain_iommu(idx, domain) \ > for (idx = 0; idx < g_num_of_iommus; idx++) \ > if (domain->iommu_refcnt[idx]) > @@ -552,6 +560,11 @@ static inline int domain_type_is_si(struct > dmar_domain *domain) > return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY; > } > > +static inline int domain_type_is_flt(struct dmar_domain *domain) > +{ > + return domain->flags & DOMAIN_FLAG_FIRST_LEVEL_TRANS; > +} > + > static inline int domain_pfn_supported(struct dmar_domain *domain, > unsigned long pfn) > { > @@ -1147,8 +1160,15 @@ static struct page *domain_unmap(struct > dmar_domain *domain, > BUG_ON(start_pfn > last_pfn); > > /* we don't need lock here; nobody else touches the iova range */ > - freelist = dma_pte_clear_level(domain, agaw_to_level(domain- > >agaw), > - domain->pgd, 0, start_pfn, last_pfn, > NULL); > + if (domain_type_is_flt(domain)) > + freelist = intel_mmunmap_range(domain, > + dma_pfn_to_addr(start_pfn), > + dma_pfn_to_addr(last_pfn + 1)); > + else > + freelist = dma_pte_clear_level(domain, > + agaw_to_level(domain->agaw), > + domain->pgd, 0, start_pfn, > + last_pfn, NULL); what about providing an unified interface at the caller side, then having the level differentiated within the interface? > > /* free pgd */ > if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) > { > @@ -2213,9 +2233,10 @@ static inline int hardware_largepage_caps(struct > dmar_domain *domain, > return level; > } > > -static int __domain_mapping(struct dmar_domain *domain, unsigned long > iov_pfn, > - struct scatterlist *sg, unsigned long phys_pfn, > - unsigned long nr_pages, int prot) > +static int > +__domain_mapping_dma(struct dmar_domain *domain, unsigned long > iov_pfn, > + struct scatterlist *sg, unsigned long phys_pfn, > + unsigned long nr_pages, int prot) > { > struct dma_pte *first_pte = NULL, *pte = NULL; > phys_addr_t uninitialized_var(pteval); > @@ -2223,13 +2244,6 @@ static int __domain_mapping(struct > dmar_domain *domain, unsigned long iov_pfn, > unsigned int largepage_lvl = 0; > unsigned long lvl_pages = 0; > > - BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1)); > - > - if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0) > - return -EINVAL; > - > - prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP; > - > if (!sg) { > sg_res = nr_pages; > pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | > prot; > @@ -2328,6 +2342,62 @@ static int __domain_mapping(struct > dmar_domain *domain, unsigned long iov_pfn, > return 0; > } > > +static int > +__domain_mapping_mm(struct dmar_domain *domain, unsigned long > iov_pfn, > + struct scatterlist *sg, unsigned long phys_pfn, > + unsigned long nr_pages, int prot) > +{ > + int ret = 0; > + > + if (!sg) > + return intel_mmmap_range(domain, > dma_pfn_to_addr(iov_pfn), > + dma_pfn_to_addr(iov_pfn + > nr_pages), > + dma_pfn_to_addr(phys_pfn), prot); > + > + while (nr_pages > 0) { > + unsigned long sg_pages, phys; > + unsigned long pgoff = sg->offset & ~PAGE_MASK; > + > + sg_pages = aligned_nrpages(sg->offset, sg->length); > + phys = sg_phys(sg) - pgoff; > + > + ret = intel_mmmap_range(domain, > dma_pfn_to_addr(iov_pfn), > + dma_pfn_to_addr(iov_pfn + > sg_pages), > + phys, prot); > + if (ret) > + break; > + > + sg->dma_address = > ((dma_addr_t)dma_pfn_to_addr(iov_pfn)) + pgoff; > + sg->dma_length = sg->length; > + > + nr_pages -= sg_pages; > + iov_pfn += sg_pages; > + sg = sg_next(sg); > + } > + > + return ret; > +} > + > +static int > +__domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, > + struct scatterlist *sg, unsigned long phys_pfn, > + unsigned long nr_pages, int prot) > +{ > + BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1)); > + > + if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0) > + return -EINVAL; > + > + prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP; > + > + if (domain_type_is_flt(domain)) > + return __domain_mapping_mm(domain, iov_pfn, sg, > + phys_pfn, nr_pages, prot); > + else > + return __domain_mapping_dma(domain, iov_pfn, sg, > + phys_pfn, nr_pages, prot); > +} > + > static int domain_mapping(struct dmar_domain *domain, unsigned long > iov_pfn, > struct scatterlist *sg, unsigned long phys_pfn, > unsigned long nr_pages, int prot) > -- > 2.17.1