On 2020/4/30 22:34, Jean-Philippe Brucker wrote:
When a recoverable page fault is handled by the fault workqueue, find the
associated mm and call handle_mm_fault.
Signed-off-by: Jean-Philippe Brucker <jean-philippe@xxxxxxxxxx>
---
v5->v6: select CONFIG_IOMMU_SVA
---
drivers/iommu/Kconfig | 1 +
drivers/iommu/io-pgfault.c | 79 +++++++++++++++++++++++++++++++++++++-
2 files changed, 78 insertions(+), 2 deletions(-)
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 4f33e489f0726..1e64ee6592e16 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -109,6 +109,7 @@ config IOMMU_SVA
config IOMMU_PAGE_FAULT
bool
+ select IOMMU_SVA
It would be better to move this to the previous patch.
config FSL_PAMU
bool "Freescale IOMMU support"
diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c
index 38732e97faac1..09a71dc4de20a 100644
--- a/drivers/iommu/io-pgfault.c
+++ b/drivers/iommu/io-pgfault.c
@@ -7,9 +7,12 @@
#include <linux/iommu.h>
#include <linux/list.h>
+#include <linux/sched/mm.h>
#include <linux/slab.h>
#include <linux/workqueue.h>
+#include "iommu-sva.h"
+
/**
* struct iopf_queue - IO Page Fault queue
* @wq: the fault workqueue
@@ -68,8 +71,57 @@ static int iopf_complete_group(struct device *dev, struct iopf_fault *iopf,
static enum iommu_page_response_code
iopf_handle_single(struct iopf_fault *iopf)
{
- /* TODO */
- return -ENODEV;
+ vm_fault_t ret;
+ struct mm_struct *mm;
+ struct vm_area_struct *vma;
+ unsigned int access_flags = 0;
+ unsigned int fault_flags = FAULT_FLAG_REMOTE;
+ struct iommu_fault_page_request *prm = &iopf->fault.prm;
+ enum iommu_page_response_code status = IOMMU_PAGE_RESP_INVALID;
+
+ if (!(prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID))
+ return status;
+
+ mm = iommu_sva_find(prm->pasid);
+ if (IS_ERR_OR_NULL(mm))
+ return status;
+
+ down_read(&mm->mmap_sem);
+
+ vma = find_extend_vma(mm, prm->addr);
+ if (!vma)
+ /* Unmapped area */
+ goto out_put_mm;
+
+ if (prm->perm & IOMMU_FAULT_PERM_READ)
+ access_flags |= VM_READ;
+
+ if (prm->perm & IOMMU_FAULT_PERM_WRITE) {
+ access_flags |= VM_WRITE;
+ fault_flags |= FAULT_FLAG_WRITE;
+ }
+
+ if (prm->perm & IOMMU_FAULT_PERM_EXEC) {
+ access_flags |= VM_EXEC;
+ fault_flags |= FAULT_FLAG_INSTRUCTION;
+ }
+
+ if (!(prm->perm & IOMMU_FAULT_PERM_PRIV))
+ fault_flags |= FAULT_FLAG_USER;
+
+ if (access_flags & ~vma->vm_flags)
+ /* Access fault */
+ goto out_put_mm;
+
+ ret = handle_mm_fault(vma, prm->addr, fault_flags);
+ status = ret & VM_FAULT_ERROR ? IOMMU_PAGE_RESP_INVALID :
+ IOMMU_PAGE_RESP_SUCCESS;
+
+out_put_mm:
+ up_read(&mm->mmap_sem);
+ mmput(mm);
+
+ return status;
}
static void iopf_handle_group(struct work_struct *work)
@@ -104,6 +156,29 @@ static void iopf_handle_group(struct work_struct *work)
*
* Add a fault to the device workqueue, to be handled by mm.
*
+ * This module doesn't handle PCI PASID Stop Marker; IOMMU drivers must discard
+ * them before reporting faults. A PASID Stop Marker (LRW = 0b100) doesn't
+ * expect a response. It may be generated when disabling a PASID (issuing a
+ * PASID stop request) by some PCI devices.
+ *
+ * The PASID stop request is issued by the device driver before unbind(). Once
+ * it completes, no page request is generated for this PASID anymore and
+ * outstanding ones have been pushed to the IOMMU (as per PCIe 4.0r1.0 - 6.20.1
+ * and 10.4.1.2 - Managing PASID TLP Prefix Usage). Some PCI devices will wait
+ * for all outstanding page requests to come back with a response before
+ * completing the PASID stop request. Others do not wait for page responses, and
+ * instead issue this Stop Marker that tells us when the PASID can be
+ * reallocated.
+ *
+ * It is safe to discard the Stop Marker because it is an optimization.
+ * a. Page requests, which are posted requests, have been flushed to the IOMMU
+ * when the stop request completes.
+ * b. We flush all fault queues on unbind() before freeing the PASID.
+ *
+ * So even though the Stop Marker might be issued by the device *after* the stop
+ * request completes, outstanding faults will have been dealt with by the time
+ * we free the PASID.
+ *
* Return: 0 on success and <0 on error.
*/
int iommu_queue_iopf(struct iommu_fault *fault, void *cookie)
The same for the comments.
Best regards,
baolu