On top of the tree at git.infradead.org/users/dwmw2/linux-svm.git (http:// or git://). For userspace addresses, we use the MMU notifiers and flush the IOTLB as appropriate. However, we need to do it for kernel addresses too — which basically means adding a hook to tlb_flush_kernel_range(). Does this look reasonable? I was trying to avoid it and insist on supporting addresses within the kernel's static mapping only. But it doesn't look like that's a reasonable thing to require. Signed-off-by: David Woodhouse <David.Woodhouse@xxxxxxxxx> --- arch/x86/mm/tlb.c | 2 ++ drivers/iommu/intel-svm.c | 37 ++++++++++++++++++++++++++++++++++--- include/linux/intel-iommu.h | 6 +++++- include/linux/intel-svm.h | 13 +++++-------- 4 files changed, 46 insertions(+), 12 deletions(-) diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h index 0a48ccf..61d9533 100644 --- a/include/linux/intel-svm.h +++ b/include/linux/intel-svm.h @@ -44,14 +44,11 @@ struct svm_dev_ops { /* * The SVM_FLAG_SUPERVISOR_MODE flag requests a PASID which can be used only - * for access to kernel addresses. No IOTLB flushes are automatically done - * for kernel mappings; it is valid only for access to the kernel's static - * 1:1 mapping of physical memory — not to vmalloc or even module mappings. - * A future API addition may permit the use of such ranges, by means of an - * explicit IOTLB flush call (akin to the DMA API's unmap method). - * - * It is unlikely that we will ever hook into flush_tlb_kernel_range() to - * do such IOTLB flushes automatically. + * for access to kernel addresses. IOTLB flushes are performed as required + * by means of a hook from flush_tlb_kernel_range(). This flag is mutually + * exclusive with the SVM_FLAG_PRIVATE_PASID flag — there can be only one + * PASID used for kernel mode, to keep the performance implications of the + * IOTLB flush hook relatively sane. */ #define SVM_FLAG_SUPERVISOR_MODE (1<<1) diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 8ddb5d0..40ebe83 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -6,6 +6,7 @@ #include <linux/interrupt.h> #include <linux/module.h> #include <linux/cpu.h> +#include <linux/intel-iommu.h> #include <asm/tlbflush.h> #include <asm/mmu_context.h> @@ -266,6 +267,7 @@ static void do_kernel_range_flush(void *info) void flush_tlb_kernel_range(unsigned long start, unsigned long end) { + intel_iommu_flush_kernel_pasid(start, end); /* Balance as user space task's flush, a bit conservative */ if (end == TLB_FLUSH_ALL || diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index a584df0..f8ca3c1 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -23,6 +23,7 @@ #include <linux/pci-ats.h> #include <linux/dmar.h> #include <linux/interrupt.h> +#include <asm/tlbflush.h> static irqreturn_t prq_event_thread(int irq, void *d); @@ -264,6 +265,26 @@ static const struct mmu_notifier_ops intel_mmuops = { .invalidate_range = intel_invalidate_range, }; +void intel_iommu_flush_kernel_pasid(unsigned long start, unsigned long end) +{ + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; + unsigned long pages; + + if (end == TLB_FLUSH_ALL) + pages = end; + else + pages = (end - start) >> VTD_PAGE_SHIFT; + + rcu_read_lock(); + for_each_active_iommu(iommu, drhd) { + struct intel_svm *svm = rcu_dereference(iommu->kernel_svm); + if (svm) + intel_flush_svm_range(svm, start, pages, 0, 1); + } + rcu_read_unlock(); +} + static DEFINE_MUTEX(pasid_mutex); int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ops *ops) @@ -286,6 +307,8 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ pasid_max = 1 << 20; if ((flags & SVM_FLAG_SUPERVISOR_MODE)) { + if (flags & SVM_FLAG_PRIVATE_PASID) + return -EINVAL; if (!ecap_srs(iommu->ecap)) return -EINVAL; } else if (pasid) { @@ -294,7 +317,9 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ } mutex_lock(&pasid_mutex); - if (pasid && !(flags & SVM_FLAG_PRIVATE_PASID)) { + if (SVM_FLAG_SUPERVISOR_MODE) + svm = iommu->kernel_svm; + else if (pasid && !(flags & SVM_FLAG_PRIVATE_PASID)) { int i; idr_for_each_entry(&iommu->pasid_idr, svm, i) { @@ -378,8 +403,10 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ } iommu->pasid_table[svm->pasid].val = (u64)__pa(mm->pgd) | 1; mm = NULL; - } else + } else { iommu->pasid_table[svm->pasid].val = (u64)__pa(init_mm.pgd) | 1 | (1ULL << 11); + rcu_assign_pointer(iommu->kernel_svm, svm); + } wmb(); } list_add_rcu(&sdev->list, &svm->devs); @@ -432,8 +459,12 @@ int intel_svm_unbind_mm(struct device *dev, int pasid) mmu_notifier_unregister(&svm->notifier, svm->mm); idr_remove(&svm->iommu->pasid_idr, svm->pasid); - if (svm->mm) + if (svm->mm) { mmput(svm->mm); + } else { + rcu_assign_pointer(iommu->kernel_svm, NULL); + synchronize_rcu(); + } /* We mandate that no page faults may be outstanding * for the PASID when intel_svm_unbind_mm() is called. * If that is not obeyed, subtle errors will happen. diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 821273c..169bc84 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -391,6 +391,7 @@ enum { struct pasid_entry; struct pasid_state_entry; struct page_req_dsc; +struct intel_svm; struct intel_iommu { void __iomem *reg; /* Pointer to hardware regs, virtual addr */ @@ -426,6 +427,7 @@ struct intel_iommu { struct page_req_dsc *prq; unsigned char prq_name[16]; /* Name for PRQ interrupt */ struct idr pasid_idr; + struct intel_svm __rcu *kernel_svm; #endif struct q_inval *qi; /* Queued invalidation info */ u32 *iommu_state; /* Store iommu states between suspend and resume.*/ @@ -496,8 +498,10 @@ struct intel_svm { extern int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev); extern struct intel_iommu *intel_svm_device_to_iommu(struct device *dev); +extern void intel_iommu_flush_kernel_pasid(unsigned long start, unsigned long end); +#else +#define intel_iommu_flush_kernel_pasid(start, end) do { ; } while(0) #endif - extern const struct attribute_group *intel_iommu_groups[]; #endif -- David Woodhouse Open Source Technology Centre David.Woodhouse@xxxxxxxxx Intel Corporation
Attachment:
smime.p7s
Description: S/MIME cryptographic signature