On Tue, 2020-03-03 at 10:28 +0100, Frederic Barrat wrote: > > Le 21/02/2020 à 04:27, Alastair D'Silva a écrit : > > From: Alastair D'Silva <alastair@xxxxxxxxxxx> > > > > This patch introduces a character device (/dev/ocxl-scmX) which > > further > > patches will use to interact with userspace. > > > > Signed-off-by: Alastair D'Silva <alastair@xxxxxxxxxxx> > > --- > > arch/powerpc/platforms/powernv/pmem/ocxl.c | 116 > > +++++++++++++++++- > > .../platforms/powernv/pmem/ocxl_internal.h | 2 + > > 2 files changed, 116 insertions(+), 2 deletions(-) > > > > diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl.c > > b/arch/powerpc/platforms/powernv/pmem/ocxl.c > > index b8bd7e703b19..63109a870d2c 100644 > > --- a/arch/powerpc/platforms/powernv/pmem/ocxl.c > > +++ b/arch/powerpc/platforms/powernv/pmem/ocxl.c > > @@ -10,6 +10,7 @@ > > #include <misc/ocxl.h> > > #include <linux/delay.h> > > #include <linux/ndctl.h> > > +#include <linux/fs.h> > > #include <linux/mm_types.h> > > #include <linux/memory_hotplug.h> > > #include "ocxl_internal.h" > > @@ -339,6 +340,9 @@ static void free_ocxlpmem(struct ocxlpmem > > *ocxlpmem) > > > > free_minor(ocxlpmem); > > > > + if (ocxlpmem->cdev.owner) > > + cdev_del(&ocxlpmem->cdev); > > + > > if (ocxlpmem->metadata_addr) > > devm_memunmap(&ocxlpmem->dev, ocxlpmem->metadata_addr); > > > > @@ -396,6 +400,70 @@ static int ocxlpmem_register(struct ocxlpmem > > *ocxlpmem) > > return device_register(&ocxlpmem->dev); > > } > > > > +static void ocxlpmem_put(struct ocxlpmem *ocxlpmem) > > +{ > > + put_device(&ocxlpmem->dev); > > +} > > + > > +static struct ocxlpmem *ocxlpmem_get(struct ocxlpmem *ocxlpmem) > > +{ > > + return (get_device(&ocxlpmem->dev) == NULL) ? NULL : ocxlpmem; > > +} > > + > > +static struct ocxlpmem *find_and_get_ocxlpmem(dev_t devno) > > +{ > > + struct ocxlpmem *ocxlpmem; > > + int minor = MINOR(devno); > > + /* > > + * We don't declare an RCU critical section here, as our AFU > > + * is protected by a reference counter on the device. By the > > time the > > + * minor number of a device is removed from the idr, the ref > > count of > > + * the device is already at 0, so no user API will access that > > AFU and > > + * this function can't return it. > > + */ > > I fixed something related in the ocxl driver (which had enough > changes > with the introduction of the "info" device to make a similar comment > become wrong). See commit a58d37bce0d21. The issue is handling a > simultaneous open() and removal of the device through /sysfs as best > we can. > > We are on a file open path and it's not like we're going to have a > thousand clients, so performance is not that critical. We can take > the > mutex before searching in the IDR and release it after we increment > the > reference count on the device. > But that's not enough: we could still find the device in the IDR > while > it is being removed in free_ocxlpmem(). I believe the only safe way > to > address it is by removing the user-facing APIs (the char device) > before > calling device_unregister(). So that it's not possible to find the > device in file_open() if it's in the middle of being removed. > > Fred > > Ok, I'll replicate that patch & follow your advice. > > + ocxlpmem = idr_find(&minors_idr, minor); > > + if (ocxlpmem) > > + ocxlpmem_get(ocxlpmem); > > + return ocxlpmem; > > +} > > + > > +static int file_open(struct inode *inode, struct file *file) > > +{ > > + struct ocxlpmem *ocxlpmem; > > + > > + ocxlpmem = find_and_get_ocxlpmem(inode->i_rdev); > > + if (!ocxlpmem) > > + return -ENODEV; > > + > > + file->private_data = ocxlpmem; > > + return 0; > > +} > > + > > +static int file_release(struct inode *inode, struct file *file) > > +{ > > + struct ocxlpmem *ocxlpmem = file->private_data; > > + > > + ocxlpmem_put(ocxlpmem); > > + return 0; > > +} > > + > > +static const struct file_operations fops = { > > + .owner = THIS_MODULE, > > + .open = file_open, > > + .release = file_release, > > +}; > > + > > +/** > > + * create_cdev() - Create the chardev in /dev for the device > > + * @ocxlpmem: the SCM metadata > > + * Return: 0 on success, negative on failure > > + */ > > +static int create_cdev(struct ocxlpmem *ocxlpmem) > > +{ > > + cdev_init(&ocxlpmem->cdev, &fops); > > + return cdev_add(&ocxlpmem->cdev, ocxlpmem->dev.devt, 1); > > +} > > + > > /** > > * ocxlpmem_remove() - Free an OpenCAPI persistent memory device > > * @pdev: the PCI device information struct > > @@ -572,6 +640,11 @@ static int probe(struct pci_dev *pdev, const > > struct pci_device_id *ent) > > goto err; > > } > > > > + if (create_cdev(ocxlpmem)) { > > + dev_err(&pdev->dev, "Could not create character > > device\n"); > > + goto err; > > + } > > As already mentioned in a previous patch, we branch to the err label > so > rc needs to be set to a valid error. > Ok > > > > + > > elapsed = 0; > > timeout = ocxlpmem->readiness_timeout + ocxlpmem- > > >memory_available_timeout; > > while (!is_usable(ocxlpmem, false)) { > > @@ -613,20 +686,59 @@ static struct pci_driver pci_driver = { > > .shutdown = ocxlpmem_remove, > > }; > > > > +static int file_init(void) > > +{ > > + int rc; > > + > > + mutex_init(&minors_idr_lock); > > + idr_init(&minors_idr); > > + > > + rc = alloc_chrdev_region(&ocxlpmem_dev, 0, NUM_MINORS, "ocxl- > > pmem"); > > + if (rc) { > > + idr_destroy(&minors_idr); > > + pr_err("Unable to allocate OpenCAPI persistent memory > > major number: %d\n", rc); > > + return rc; > > + } > > + > > + ocxlpmem_class = class_create(THIS_MODULE, "ocxl-pmem"); > > + if (IS_ERR(ocxlpmem_class)) { > > + idr_destroy(&minors_idr); > > + pr_err("Unable to create ocxl-pmem class\n"); > > + unregister_chrdev_region(ocxlpmem_dev, NUM_MINORS); > > + return PTR_ERR(ocxlpmem_class); > > + } > > + > > + return 0; > > +} > > + > > +static void file_exit(void) > > +{ > > + class_destroy(ocxlpmem_class); > > + unregister_chrdev_region(ocxlpmem_dev, NUM_MINORS); > > + idr_destroy(&minors_idr); > > +} > > + > > static int __init ocxlpmem_init(void) > > { > > - int rc = 0; > > + int rc; > > > > - rc = pci_register_driver(&pci_driver); > > + rc = file_init(); > > if (rc) > > return rc; > > > > + rc = pci_register_driver(&pci_driver); > > + if (rc) { > > + file_exit(); > > + return rc; > > + } > > + > > return 0; > > } > > > > static void ocxlpmem_exit(void) > > { > > pci_unregister_driver(&pci_driver); > > + file_exit(); > > } > > > > module_init(ocxlpmem_init); > > diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h > > b/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h > > index 28e2020f6355..d2d81fec7bb1 100644 > > --- a/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h > > +++ b/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h > > @@ -2,6 +2,7 @@ > > // Copyright 2019 IBM Corp. > > > > #include <linux/pci.h> > > +#include <linux/cdev.h> > > #include <misc/ocxl.h> > > #include <linux/libnvdimm.h> > > #include <linux/mm.h> > > @@ -99,6 +100,7 @@ struct ocxlpmem_function0 { > > struct ocxlpmem { > > struct device dev; > > struct pci_dev *pdev; > > + struct cdev cdev; > > struct ocxl_fn *ocxl_fn; > > struct nd_interleave_set nd_set; > > struct nvdimm_bus_descriptor bus_desc; > > -- Alastair D'Silva Open Source Developer Linux Technology Centre, IBM Australia mob: 0423 762 819