On Fri, Mar 15, 2024 at 09:46:06AM +0800, Baolu Lu wrote: > On 3/9/24 2:03 AM, Jason Gunthorpe wrote: > > On Mon, Jan 22, 2024 at 03:38:59PM +0800, Lu Baolu wrote: > > > --- /dev/null > > > +++ b/drivers/iommu/iommufd/fault.c > > > @@ -0,0 +1,255 @@ > > > +// SPDX-License-Identifier: GPL-2.0-only > > > +/* Copyright (C) 2024 Intel Corporation > > > + */ > > > +#define pr_fmt(fmt) "iommufd: " fmt > > > + > > > +#include <linux/file.h> > > > +#include <linux/fs.h> > > > +#include <linux/module.h> > > > +#include <linux/mutex.h> > > > +#include <linux/iommufd.h> > > > +#include <linux/poll.h> > > > +#include <linux/anon_inodes.h> > > > +#include <uapi/linux/iommufd.h> > > > + > > > +#include "iommufd_private.h" > > > + > > > +static int device_add_fault(struct iopf_group *group) > > > +{ > > > + struct iommufd_device *idev = group->cookie->private; > > > + void *curr; > > > + > > > + curr = xa_cmpxchg(&idev->faults, group->last_fault.fault.prm.grpid, > > > + NULL, group, GFP_KERNEL); > > > + > > > + return curr ? xa_err(curr) : 0; > > > +} > > > + > > > +static void device_remove_fault(struct iopf_group *group) > > > +{ > > > + struct iommufd_device *idev = group->cookie->private; > > > + > > > + xa_store(&idev->faults, group->last_fault.fault.prm.grpid, > > > + NULL, GFP_KERNEL); > > > > xa_erase ? > > Yes. Sure. > > > Is grpid OK to use this way? Doesn't it come from the originating > > device? > > The group ID is generated by the hardware. Here, we use it as an index > in the fault array to ensure it can be quickly retrieved in the page > fault response path. I'm nervous about this, we are trusting HW outside the kernel to provide unique grp id's which are integral to how the kernel operates.. > > > +static ssize_t iommufd_fault_fops_read(struct file *filep, char __user *buf, > > > + size_t count, loff_t *ppos) > > > +{ > > > + size_t fault_size = sizeof(struct iommu_hwpt_pgfault); > > > + struct iommufd_fault *fault = filep->private_data; > > > + struct iommu_hwpt_pgfault data; > > > + struct iommufd_device *idev; > > > + struct iopf_group *group; > > > + struct iopf_fault *iopf; > > > + size_t done = 0; > > > + int rc; > > > + > > > + if (*ppos || count % fault_size) > > > + return -ESPIPE; > > > + > > > + mutex_lock(&fault->mutex); > > > + while (!list_empty(&fault->deliver) && count > done) { > > > + group = list_first_entry(&fault->deliver, > > > + struct iopf_group, node); > > > + > > > + if (list_count_nodes(&group->faults) * fault_size > count - done) > > > + break; > > > + > > > + idev = (struct iommufd_device *)group->cookie->private; > > > + list_for_each_entry(iopf, &group->faults, list) { > > > + iommufd_compose_fault_message(&iopf->fault, &data, idev); > > > + rc = copy_to_user(buf + done, &data, fault_size); > > > + if (rc) > > > + goto err_unlock; > > > + done += fault_size; > > > + } > > > + > > > + rc = device_add_fault(group); > > > > See I wonder if this should be some xa_alloc or something instead of > > trying to use the grpid? > > So this magic number will be passed to user space in the fault message. > And the user will then include this number in its response message. The > response message is valid only when the magic number matches. Do I get > you correctly? Yes, then it is simple xa_alloc() and xa_load() without any other searching and we don't have to rely on the grpid to be correctly formed by the PCI device. But I don't know about performance xa_alloc() is pretty fast but trusting the grpid would be faster.. IMHO from a uapi perspective we should have a definate "cookie" that gets echo'd back. If the kernel uses xa_alloc or grpid to build that cookie it doesn't matter to the uAPI. Jason