Re: [PATCH v3 16/27] powerpc/powernv/pmem: Register a character device for userspace to interact with

From: Frederic Barrat
Date: Tue Mar 03 2020 - 04:28:29 EST




Le 21/02/2020 Ã 04:27, Alastair D'Silva a ÃcritÂ:
From: Alastair D'Silva <alastair@xxxxxxxxxxx>

This patch introduces a character device (/dev/ocxl-scmX) which further
patches will use to interact with userspace.

Signed-off-by: Alastair D'Silva <alastair@xxxxxxxxxxx>
---
arch/powerpc/platforms/powernv/pmem/ocxl.c | 116 +++++++++++++++++-
.../platforms/powernv/pmem/ocxl_internal.h | 2 +
2 files changed, 116 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl.c b/arch/powerpc/platforms/powernv/pmem/ocxl.c
index b8bd7e703b19..63109a870d2c 100644
--- a/arch/powerpc/platforms/powernv/pmem/ocxl.c
+++ b/arch/powerpc/platforms/powernv/pmem/ocxl.c
@@ -10,6 +10,7 @@
#include <misc/ocxl.h>
#include <linux/delay.h>
#include <linux/ndctl.h>
+#include <linux/fs.h>
#include <linux/mm_types.h>
#include <linux/memory_hotplug.h>
#include "ocxl_internal.h"
@@ -339,6 +340,9 @@ static void free_ocxlpmem(struct ocxlpmem *ocxlpmem)
free_minor(ocxlpmem);
+ if (ocxlpmem->cdev.owner)
+ cdev_del(&ocxlpmem->cdev);
+
if (ocxlpmem->metadata_addr)
devm_memunmap(&ocxlpmem->dev, ocxlpmem->metadata_addr);
@@ -396,6 +400,70 @@ static int ocxlpmem_register(struct ocxlpmem *ocxlpmem)
return device_register(&ocxlpmem->dev);
}
+static void ocxlpmem_put(struct ocxlpmem *ocxlpmem)
+{
+ put_device(&ocxlpmem->dev);
+}
+
+static struct ocxlpmem *ocxlpmem_get(struct ocxlpmem *ocxlpmem)
+{
+ return (get_device(&ocxlpmem->dev) == NULL) ? NULL : ocxlpmem;
+}
+
+static struct ocxlpmem *find_and_get_ocxlpmem(dev_t devno)
+{
+ struct ocxlpmem *ocxlpmem;
+ int minor = MINOR(devno);
+ /*
+ * We don't declare an RCU critical section here, as our AFU
+ * is protected by a reference counter on the device. By the time the
+ * minor number of a device is removed from the idr, the ref count of
+ * the device is already at 0, so no user API will access that AFU and
+ * this function can't return it.
+ */


I fixed something related in the ocxl driver (which had enough changes with the introduction of the "info" device to make a similar comment become wrong). See commit a58d37bce0d21. The issue is handling a simultaneous open() and removal of the device through /sysfs as best we can.

We are on a file open path and it's not like we're going to have a thousand clients, so performance is not that critical. We can take the mutex before searching in the IDR and release it after we increment the reference count on the device.
But that's not enough: we could still find the device in the IDR while it is being removed in free_ocxlpmem(). I believe the only safe way to address it is by removing the user-facing APIs (the char device) before calling device_unregister(). So that it's not possible to find the device in file_open() if it's in the middle of being removed.

Fred


+ ocxlpmem = idr_find(&minors_idr, minor);
+ if (ocxlpmem)
+ ocxlpmem_get(ocxlpmem);
+ return ocxlpmem;
+}
+
+static int file_open(struct inode *inode, struct file *file)
+{
+ struct ocxlpmem *ocxlpmem;
+
+ ocxlpmem = find_and_get_ocxlpmem(inode->i_rdev);
+ if (!ocxlpmem)
+ return -ENODEV;
+
+ file->private_data = ocxlpmem;
+ return 0;
+}
+
+static int file_release(struct inode *inode, struct file *file)
+{
+ struct ocxlpmem *ocxlpmem = file->private_data;
+
+ ocxlpmem_put(ocxlpmem);
+ return 0;
+}
+
+static const struct file_operations fops = {
+ .owner = THIS_MODULE,
+ .open = file_open,
+ .release = file_release,
+};
+
+/**
+ * create_cdev() - Create the chardev in /dev for the device
+ * @ocxlpmem: the SCM metadata
+ * Return: 0 on success, negative on failure
+ */
+static int create_cdev(struct ocxlpmem *ocxlpmem)
+{
+ cdev_init(&ocxlpmem->cdev, &fops);
+ return cdev_add(&ocxlpmem->cdev, ocxlpmem->dev.devt, 1);
+}
+
/**
* ocxlpmem_remove() - Free an OpenCAPI persistent memory device
* @pdev: the PCI device information struct
@@ -572,6 +640,11 @@ static int probe(struct pci_dev *pdev, const struct pci_device_id *ent)
goto err;
}
+ if (create_cdev(ocxlpmem)) {
+ dev_err(&pdev->dev, "Could not create character device\n");
+ goto err;
+ }


As already mentioned in a previous patch, we branch to the err label so rc needs to be set to a valid error.



+
elapsed = 0;
timeout = ocxlpmem->readiness_timeout + ocxlpmem->memory_available_timeout;
while (!is_usable(ocxlpmem, false)) {
@@ -613,20 +686,59 @@ static struct pci_driver pci_driver = {
.shutdown = ocxlpmem_remove,
};
+static int file_init(void)
+{
+ int rc;
+
+ mutex_init(&minors_idr_lock);
+ idr_init(&minors_idr);
+
+ rc = alloc_chrdev_region(&ocxlpmem_dev, 0, NUM_MINORS, "ocxl-pmem");
+ if (rc) {
+ idr_destroy(&minors_idr);
+ pr_err("Unable to allocate OpenCAPI persistent memory major number: %d\n", rc);
+ return rc;
+ }
+
+ ocxlpmem_class = class_create(THIS_MODULE, "ocxl-pmem");
+ if (IS_ERR(ocxlpmem_class)) {
+ idr_destroy(&minors_idr);
+ pr_err("Unable to create ocxl-pmem class\n");
+ unregister_chrdev_region(ocxlpmem_dev, NUM_MINORS);
+ return PTR_ERR(ocxlpmem_class);
+ }
+
+ return 0;
+}
+
+static void file_exit(void)
+{
+ class_destroy(ocxlpmem_class);
+ unregister_chrdev_region(ocxlpmem_dev, NUM_MINORS);
+ idr_destroy(&minors_idr);
+}
+
static int __init ocxlpmem_init(void)
{
- int rc = 0;
+ int rc;
- rc = pci_register_driver(&pci_driver);
+ rc = file_init();
if (rc)
return rc;
+ rc = pci_register_driver(&pci_driver);
+ if (rc) {
+ file_exit();
+ return rc;
+ }
+
return 0;
}
static void ocxlpmem_exit(void)
{
pci_unregister_driver(&pci_driver);
+ file_exit();
}
module_init(ocxlpmem_init);
diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h b/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
index 28e2020f6355..d2d81fec7bb1 100644
--- a/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
+++ b/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
@@ -2,6 +2,7 @@
// Copyright 2019 IBM Corp.
#include <linux/pci.h>
+#include <linux/cdev.h>
#include <misc/ocxl.h>
#include <linux/libnvdimm.h>
#include <linux/mm.h>
@@ -99,6 +100,7 @@ struct ocxlpmem_function0 {
struct ocxlpmem {
struct device dev;
struct pci_dev *pdev;
+ struct cdev cdev;
struct ocxl_fn *ocxl_fn;
struct nd_interleave_set nd_set;
struct nvdimm_bus_descriptor bus_desc;