Hi Baolu, On 2021/3/10 10:09, Lu Baolu wrote: > Hi Shenming, > > On 3/9/21 2:22 PM, Shenming Lu wrote: >> This patch follows the discussion here: >> >> https://lore.kernel.org/linux-acpi/YAaxjmJW+ZMvrhac@myrica/ >> >> In order to support more scenarios of using IOPF (mainly consider >> the nested extension), besides keeping IOMMU_DEV_FEAT_IOPF as a >> general capability for whether delivering faults through the IOMMU, >> we extend iommu_register_fault_handler() with flags and introduce >> IOPF_REPORT_FLAT and IOPF_REPORT_NESTED to describe the page fault >> reporting capability under a specific configuration. >> IOPF_REPORT_NESTED needs additional info to indicate which level/stage >> is concerned since the fault client may be interested in only one >> level. >> >> Signed-off-by: Shenming Lu <lushenming@xxxxxxxxxx> >> --- >> .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 3 +- >> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 11 ++-- >> drivers/iommu/io-pgfault.c | 4 -- >> drivers/iommu/iommu.c | 56 ++++++++++++++++++- >> include/linux/iommu.h | 21 ++++++- >> include/uapi/linux/iommu.h | 3 + >> 6 files changed, 85 insertions(+), 13 deletions(-) >> >> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c >> index ee66d1f4cb81..5de9432349d4 100644 >> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c >> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c >> @@ -482,7 +482,8 @@ static int arm_smmu_master_sva_enable_iopf(struct arm_smmu_master *master) >> if (ret) >> return ret; >> - ret = iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev); >> + ret = iommu_register_device_fault_handler(dev, iommu_queue_iopf, >> + IOPF_REPORT_FLAT, dev); >> if (ret) { >> iopf_queue_remove_device(master->smmu->evtq.iopf, dev); >> return ret; >> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c >> index 363744df8d51..f40529d0075d 100644 >> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c >> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c >> @@ -1447,10 +1447,6 @@ static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt) >> return -EOPNOTSUPP; >> } >> - /* Stage-2 is always pinned at the moment */ >> - if (evt[1] & EVTQ_1_S2) >> - return -EFAULT; >> - >> if (evt[1] & EVTQ_1_RnW) >> perm |= IOMMU_FAULT_PERM_READ; >> else >> @@ -1468,13 +1464,18 @@ static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt) >> .flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE, >> .grpid = FIELD_GET(EVTQ_1_STAG, evt[1]), >> .perm = perm, >> - .addr = FIELD_GET(EVTQ_2_ADDR, evt[2]), >> }; >> if (ssid_valid) { >> flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID; >> flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]); >> } >> + >> + if (evt[1] & EVTQ_1_S2) { >> + flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_L2; >> + flt->prm.addr = FIELD_GET(EVTQ_3_IPA, evt[3]); >> + } else >> + flt->prm.addr = FIELD_GET(EVTQ_2_ADDR, evt[2]); >> } else { >> flt->type = IOMMU_FAULT_DMA_UNRECOV; >> flt->event = (struct iommu_fault_unrecoverable) { >> diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c >> index 1df8c1dcae77..abf16e06bcf5 100644 >> --- a/drivers/iommu/io-pgfault.c >> +++ b/drivers/iommu/io-pgfault.c >> @@ -195,10 +195,6 @@ int iommu_queue_iopf(struct iommu_fault *fault, void *cookie) >> lockdep_assert_held(¶m->lock); >> - if (fault->type != IOMMU_FAULT_PAGE_REQ) >> - /* Not a recoverable page fault */ >> - return -EOPNOTSUPP; >> - > > Any reasons why do you want to remove this check? My thought was to make the reporting cap more detailed: IOPF_REPORT_ is only for recoverable page faults (IOMMU_FAULT_PAGE_REQ), and we may add UNRECOV_FAULT_REPORT_ later for unrecoverable faults (IOMMU_FAULT_DMA_UNRECOV)... > >> /* >> * As long as we're holding param->lock, the queue can't be unlinked >> * from the device and therefore cannot disappear. >> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c >> index d0b0a15dba84..cb1d93b00f7d 100644 >> --- a/drivers/iommu/iommu.c >> +++ b/drivers/iommu/iommu.c >> @@ -1056,6 +1056,40 @@ int iommu_group_unregister_notifier(struct iommu_group *group, >> } >> EXPORT_SYMBOL_GPL(iommu_group_unregister_notifier); >> +/* >> + * iommu_update_device_fault_handler - Update the device fault handler via flags >> + * @dev: the device >> + * @mask: bits(not set) to clear >> + * @set: bits to set >> + * >> + * Update the device fault handler installed by >> + * iommu_register_device_fault_handler(). >> + * >> + * Return 0 on success, or an error. >> + */ >> +int iommu_update_device_fault_handler(struct device *dev, u32 mask, u32 set) >> +{ >> + struct dev_iommu *param = dev->iommu; >> + int ret = 0; >> + >> + if (!param) >> + return -EINVAL; >> + >> + mutex_lock(¶m->lock); >> + >> + if (param->fault_param) { >> + ret = -EINVAL; >> + goto out_unlock; >> + } >> + >> + param->fault_param->flags = (param->fault_param->flags & mask) | set; >> + >> +out_unlock: >> + mutex_unlock(¶m->lock); >> + return ret; >> +} >> +EXPORT_SYMBOL_GPL(iommu_update_device_fault_handler); > > When and why will this API be used? Why not registering the fault > handling capabilities of a device driver only once during probe()? In VFIO, stage 2 IOPF might be enabled via an ioctl from the userspace (in this series), while stage 1 IOPF is enabled from vfio_pci_enable() [1] for nested mode, they are configured separately, and currently each device can only have one iommu dev fault handler. So I choose to add a update interface for the second one. [1] https://patchwork.kernel.org/project/kvm/patch/20210223210625.604517-6-eric.auger@xxxxxxxxxx/ > >> + >> /** >> * iommu_register_device_fault_handler() - Register a device fault handler >> * @dev: the device >> @@ -1076,11 +1110,14 @@ EXPORT_SYMBOL_GPL(iommu_group_unregister_notifier); >> */ >> int iommu_register_device_fault_handler(struct device *dev, >> iommu_dev_fault_handler_t handler, >> - void *data) >> + u32 flags, void *data) >> { >> struct dev_iommu *param = dev->iommu; >> int ret = 0; >> + if (flags & IOPF_REPORT_FLAT && flags & IOPF_REPORT_NESTED) >> + return -EINVAL; >> + >> if (!param) >> return -EINVAL; >> @@ -1099,6 +1136,7 @@ int iommu_register_device_fault_handler(struct device *dev, >> goto done_unlock; >> } >> param->fault_param->handler = handler; >> + param->fault_param->flags = flags; >> param->fault_param->data = data; >> mutex_init(¶m->fault_param->lock); >> INIT_LIST_HEAD(¶m->fault_param->faults); >> @@ -1177,6 +1215,22 @@ int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt) >> goto done_unlock; >> } >> + /* The unrecoverable fault reporting is not supported at the moment. */ >> + if (evt->fault.type != IOMMU_FAULT_PAGE_REQ) >> + return -EOPNOTSUPP; > > Any reasons why do you want to disable reporting an unrecoverable fault? When I add UNRECOV_FAULT_REPORT_ (mentioned above), I will enable the unrecoverable fault reporting if the fault client concerns it. Sorry for this. :-) > >> + >> + if (evt->fault.type == IOMMU_FAULT_PAGE_REQ) { >> + if (fparam->flags & IOPF_REPORT_NESTED) { >> + if (evt->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_L2 && >> + !(fparam->flags & IOPF_REPORT_NESTED_L2_CONCERNED)) >> + return -EOPNOTSUPP; >> + if (!(evt->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_L2) && >> + !(fparam->flags & IOPF_REPORT_NESTED_L1_CONCERNED)) >> + return -EOPNOTSUPP; >> + } else if (!(fparam->flags & IOPF_REPORT_FLAT)) >> + return -EOPNOTSUPP; >> + } >> + >> if (evt->fault.type == IOMMU_FAULT_PAGE_REQ && >> (evt->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) { >> evt_pending = kmemdup(evt, sizeof(struct iommu_fault_event), >> diff --git a/include/linux/iommu.h b/include/linux/iommu.h >> index 86d688c4418f..f03d761e8310 100644 >> --- a/include/linux/iommu.h >> +++ b/include/linux/iommu.h >> @@ -352,12 +352,21 @@ struct iommu_fault_event { >> /** >> * struct iommu_fault_param - per-device IOMMU fault data >> * @handler: Callback function to handle IOMMU faults at device level >> + * @flags: Indicates whether the fault reporting is available under a >> + * specific configuration (1st/2nd-level-only(FLAT), or nested). >> + * IOPF_REPORT_NESTED needs to additionally know which level/stage >> + * is concerned. > > If IOPF_REPORT_NESTED only is not valid why do you want to define it? Yeah, it seems that IOPF_REPORT_NESTED is unnecessary, IOPF_REPORT_NESTED_L1 + IOPF_REPORT_NESTED_L2 is enough... Thanks for your comments! Shenming > >> * @data: handler private data >> * @faults: holds the pending faults which needs response >> * @lock: protect pending faults list >> */ >> struct iommu_fault_param { >> iommu_dev_fault_handler_t handler; >> +#define IOPF_REPORT_FLAT (1 << 0) >> +#define IOPF_REPORT_NESTED (1 << 1) >> +#define IOPF_REPORT_NESTED_L1_CONCERNED (1 << 2) >> +#define IOPF_REPORT_NESTED_L2_CONCERNED (1 << 3) >> + u32 flags; >> void *data; >> struct list_head faults; >> struct mutex lock; >> @@ -509,9 +518,11 @@ extern int iommu_group_register_notifier(struct iommu_group *group, >> struct notifier_block *nb); >> extern int iommu_group_unregister_notifier(struct iommu_group *group, >> struct notifier_block *nb); >> +extern int iommu_update_device_fault_handler(struct device *dev, >> + u32 mask, u32 set); >> extern int iommu_register_device_fault_handler(struct device *dev, >> iommu_dev_fault_handler_t handler, >> - void *data); >> + u32 flags, void *data); >> extern int iommu_unregister_device_fault_handler(struct device *dev); >> @@ -873,10 +884,16 @@ static inline int iommu_group_unregister_notifier(struct iommu_group *group, >> return 0; >> } >> +static inline int iommu_update_device_fault_handler(struct device *dev, >> + u32 mask, u32 set) >> +{ >> + return -ENODEV; >> +} >> + >> static inline >> int iommu_register_device_fault_handler(struct device *dev, >> iommu_dev_fault_handler_t handler, >> - void *data) >> + u32 flags, void *data) >> { >> return -ENODEV; >> } >> diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h >> index e1d9e75f2c94..0ce0dfb7713e 100644 >> --- a/include/uapi/linux/iommu.h >> +++ b/include/uapi/linux/iommu.h >> @@ -85,6 +85,8 @@ struct iommu_fault_unrecoverable { >> * When IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID is set, the page response >> * must have the same PASID value as the page request. When it is clear, >> * the page response should not have a PASID. >> + * If IOMMU_FAULT_PAGE_REQUEST_L2 is set, the fault occurred at the >> + * second level/stage, otherwise, occurred at the first level. >> * @pasid: Process Address Space ID >> * @grpid: Page Request Group Index >> * @perm: requested page permissions (IOMMU_FAULT_PERM_* values) >> @@ -96,6 +98,7 @@ struct iommu_fault_page_request { >> #define IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE (1 << 1) >> #define IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA (1 << 2) >> #define IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID (1 << 3) >> +#define IOMMU_FAULT_PAGE_REQUEST_L2 (1 << 4) >> __u32 flags; >> __u32 pasid; >> __u32 grpid; >> > > Best regards, > baolu > .