Re: [PATCH v3 16/27] powerpc/powernv/pmem: Register a character device for userspace to interact with

From: Alastair D'Silva
Date: Wed Mar 04 2020 - 22:39:19 EST


On Tue, 2020-03-03 at 10:28 +0100, Frederic Barrat wrote:
>
> Le 21/02/2020 Ã 04:27, Alastair D'Silva a Ãcrit :
> > From: Alastair D'Silva <alastair@xxxxxxxxxxx>
> >
> > This patch introduces a character device (/dev/ocxl-scmX) which
> > further
> > patches will use to interact with userspace.
> >
> > Signed-off-by: Alastair D'Silva <alastair@xxxxxxxxxxx>
> > ---
> > arch/powerpc/platforms/powernv/pmem/ocxl.c | 116
> > +++++++++++++++++-
> > .../platforms/powernv/pmem/ocxl_internal.h | 2 +
> > 2 files changed, 116 insertions(+), 2 deletions(-)
> >
> > diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl.c
> > b/arch/powerpc/platforms/powernv/pmem/ocxl.c
> > index b8bd7e703b19..63109a870d2c 100644
> > --- a/arch/powerpc/platforms/powernv/pmem/ocxl.c
> > +++ b/arch/powerpc/platforms/powernv/pmem/ocxl.c
> > @@ -10,6 +10,7 @@
> > #include <misc/ocxl.h>
> > #include <linux/delay.h>
> > #include <linux/ndctl.h>
> > +#include <linux/fs.h>
> > #include <linux/mm_types.h>
> > #include <linux/memory_hotplug.h>
> > #include "ocxl_internal.h"
> > @@ -339,6 +340,9 @@ static void free_ocxlpmem(struct ocxlpmem
> > *ocxlpmem)
> >
> > free_minor(ocxlpmem);
> >
> > + if (ocxlpmem->cdev.owner)
> > + cdev_del(&ocxlpmem->cdev);
> > +
> > if (ocxlpmem->metadata_addr)
> > devm_memunmap(&ocxlpmem->dev, ocxlpmem->metadata_addr);
> >
> > @@ -396,6 +400,70 @@ static int ocxlpmem_register(struct ocxlpmem
> > *ocxlpmem)
> > return device_register(&ocxlpmem->dev);
> > }
> >
> > +static void ocxlpmem_put(struct ocxlpmem *ocxlpmem)
> > +{
> > + put_device(&ocxlpmem->dev);
> > +}
> > +
> > +static struct ocxlpmem *ocxlpmem_get(struct ocxlpmem *ocxlpmem)
> > +{
> > + return (get_device(&ocxlpmem->dev) == NULL) ? NULL : ocxlpmem;
> > +}
> > +
> > +static struct ocxlpmem *find_and_get_ocxlpmem(dev_t devno)
> > +{
> > + struct ocxlpmem *ocxlpmem;
> > + int minor = MINOR(devno);
> > + /*
> > + * We don't declare an RCU critical section here, as our AFU
> > + * is protected by a reference counter on the device. By the
> > time the
> > + * minor number of a device is removed from the idr, the ref
> > count of
> > + * the device is already at 0, so no user API will access that
> > AFU and
> > + * this function can't return it.
> > + */
>
> I fixed something related in the ocxl driver (which had enough
> changes
> with the introduction of the "info" device to make a similar comment
> become wrong). See commit a58d37bce0d21. The issue is handling a
> simultaneous open() and removal of the device through /sysfs as best
> we can.
>
> We are on a file open path and it's not like we're going to have a
> thousand clients, so performance is not that critical. We can take
> the
> mutex before searching in the IDR and release it after we increment
> the
> reference count on the device.
> But that's not enough: we could still find the device in the IDR
> while
> it is being removed in free_ocxlpmem(). I believe the only safe way
> to
> address it is by removing the user-facing APIs (the char device)
> before
> calling device_unregister(). So that it's not possible to find the
> device in file_open() if it's in the middle of being removed.
>
> Fred
>
>

Ok, I'll replicate that patch & follow your advice.

> > + ocxlpmem = idr_find(&minors_idr, minor);
> > + if (ocxlpmem)
> > + ocxlpmem_get(ocxlpmem);
> > + return ocxlpmem;
> > +}
> > +
> > +static int file_open(struct inode *inode, struct file *file)
> > +{
> > + struct ocxlpmem *ocxlpmem;
> > +
> > + ocxlpmem = find_and_get_ocxlpmem(inode->i_rdev);
> > + if (!ocxlpmem)
> > + return -ENODEV;
> > +
> > + file->private_data = ocxlpmem;
> > + return 0;
> > +}
> > +
> > +static int file_release(struct inode *inode, struct file *file)
> > +{
> > + struct ocxlpmem *ocxlpmem = file->private_data;
> > +
> > + ocxlpmem_put(ocxlpmem);
> > + return 0;
> > +}
> > +
> > +static const struct file_operations fops = {
> > + .owner = THIS_MODULE,
> > + .open = file_open,
> > + .release = file_release,
> > +};
> > +
> > +/**
> > + * create_cdev() - Create the chardev in /dev for the device
> > + * @ocxlpmem: the SCM metadata
> > + * Return: 0 on success, negative on failure
> > + */
> > +static int create_cdev(struct ocxlpmem *ocxlpmem)
> > +{
> > + cdev_init(&ocxlpmem->cdev, &fops);
> > + return cdev_add(&ocxlpmem->cdev, ocxlpmem->dev.devt, 1);
> > +}
> > +
> > /**
> > * ocxlpmem_remove() - Free an OpenCAPI persistent memory device
> > * @pdev: the PCI device information struct
> > @@ -572,6 +640,11 @@ static int probe(struct pci_dev *pdev, const
> > struct pci_device_id *ent)
> > goto err;
> > }
> >
> > + if (create_cdev(ocxlpmem)) {
> > + dev_err(&pdev->dev, "Could not create character
> > device\n");
> > + goto err;
> > + }
>
> As already mentioned in a previous patch, we branch to the err label
> so
> rc needs to be set to a valid error.
>

Ok

>
>
> > +
> > elapsed = 0;
> > timeout = ocxlpmem->readiness_timeout + ocxlpmem-
> > >memory_available_timeout;
> > while (!is_usable(ocxlpmem, false)) {
> > @@ -613,20 +686,59 @@ static struct pci_driver pci_driver = {
> > .shutdown = ocxlpmem_remove,
> > };
> >
> > +static int file_init(void)
> > +{
> > + int rc;
> > +
> > + mutex_init(&minors_idr_lock);
> > + idr_init(&minors_idr);
> > +
> > + rc = alloc_chrdev_region(&ocxlpmem_dev, 0, NUM_MINORS, "ocxl-
> > pmem");
> > + if (rc) {
> > + idr_destroy(&minors_idr);
> > + pr_err("Unable to allocate OpenCAPI persistent memory
> > major number: %d\n", rc);
> > + return rc;
> > + }
> > +
> > + ocxlpmem_class = class_create(THIS_MODULE, "ocxl-pmem");
> > + if (IS_ERR(ocxlpmem_class)) {
> > + idr_destroy(&minors_idr);
> > + pr_err("Unable to create ocxl-pmem class\n");
> > + unregister_chrdev_region(ocxlpmem_dev, NUM_MINORS);
> > + return PTR_ERR(ocxlpmem_class);
> > + }
> > +
> > + return 0;
> > +}
> > +
> > +static void file_exit(void)
> > +{
> > + class_destroy(ocxlpmem_class);
> > + unregister_chrdev_region(ocxlpmem_dev, NUM_MINORS);
> > + idr_destroy(&minors_idr);
> > +}
> > +
> > static int __init ocxlpmem_init(void)
> > {
> > - int rc = 0;
> > + int rc;
> >
> > - rc = pci_register_driver(&pci_driver);
> > + rc = file_init();
> > if (rc)
> > return rc;
> >
> > + rc = pci_register_driver(&pci_driver);
> > + if (rc) {
> > + file_exit();
> > + return rc;
> > + }
> > +
> > return 0;
> > }
> >
> > static void ocxlpmem_exit(void)
> > {
> > pci_unregister_driver(&pci_driver);
> > + file_exit();
> > }
> >
> > module_init(ocxlpmem_init);
> > diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
> > b/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
> > index 28e2020f6355..d2d81fec7bb1 100644
> > --- a/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
> > +++ b/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
> > @@ -2,6 +2,7 @@
> > // Copyright 2019 IBM Corp.
> >
> > #include <linux/pci.h>
> > +#include <linux/cdev.h>
> > #include <misc/ocxl.h>
> > #include <linux/libnvdimm.h>
> > #include <linux/mm.h>
> > @@ -99,6 +100,7 @@ struct ocxlpmem_function0 {
> > struct ocxlpmem {
> > struct device dev;
> > struct pci_dev *pdev;
> > + struct cdev cdev;
> > struct ocxl_fn *ocxl_fn;
> > struct nd_interleave_set nd_set;
> > struct nvdimm_bus_descriptor bus_desc;
> >
--
Alastair D'Silva
Open Source Developer
Linux Technology Centre, IBM Australia
mob: 0423 762 819