Re: [PATCH v6] uio: Fix uio driver to refcount device

From: Brian Russell
Date: Thu Mar 19 2015 - 10:49:35 EST




On 19/03/15 14:23, Greg Kroah-Hartman wrote:
> On Thu, Mar 19, 2015 at 02:11:19PM +0000, Brian Russell wrote:
>> Protect uio driver from its owner being unplugged while there are open fds.
>> Embed struct device in struct uio_device, use refcounting on device, free uio_device on release.
>> info struct passed in uio_register_device can be freed on unregister, so null out the field in
>> uio_unregister_device and check accesses.
>>
>> Signed-off-by: Brian Russell <brussell@xxxxxxxxxxx>
>> ---
>> drivers/uio/uio.c | 63 +++++++++++++++++++++++++++++++---------------
>> include/linux/uio_driver.h | 2 +-
>> 2 files changed, 44 insertions(+), 21 deletions(-)
>>
>> v6: ... and put them in the right place
>>
>> v5: add patch version changes
>>
>> v4: info struct passed in uio_register_device can be freed on unregister, so null
>> out the field in uio_unregister_device and check accesses.
>>
>> v3: Embed struct device in struct uio_device, use refcounting on device, free
>> uio_device on release.
>>
>> v2: Add blank line to header
>>
>> diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c
>> index 6276f13..394cae0 100644
>> --- a/drivers/uio/uio.c
>> +++ b/drivers/uio/uio.c
>> @@ -270,7 +270,7 @@ static int uio_dev_add_attributes(struct uio_device *idev)
>> if (!map_found) {
>> map_found = 1;
>> idev->map_dir = kobject_create_and_add("maps",
>> - &idev->dev->kobj);
>> + &idev->dev.kobj);
>> if (!idev->map_dir)
>> goto err_map;
>> }
>> @@ -295,7 +295,7 @@ static int uio_dev_add_attributes(struct uio_device *idev)
>> if (!portio_found) {
>> portio_found = 1;
>> idev->portio_dir = kobject_create_and_add("portio",
>> - &idev->dev->kobj);
>> + &idev->dev.kobj);
>> if (!idev->portio_dir)
>> goto err_portio;
>> }
>> @@ -334,7 +334,7 @@ err_map_kobj:
>> kobject_put(&map->kobj);
>> }
>> kobject_put(idev->map_dir);
>> - dev_err(idev->dev, "error creating sysfs files (%d)\n", ret);
>> + dev_err(&idev->dev, "error creating sysfs files (%d)\n", ret);
>> return ret;
>> }
>>
>> @@ -371,7 +371,7 @@ static int uio_get_minor(struct uio_device *idev)
>> idev->minor = retval;
>> retval = 0;
>> } else if (retval == -ENOSPC) {
>> - dev_err(idev->dev, "too many uio devices\n");
>> + dev_err(&idev->dev, "too many uio devices\n");
>> retval = -EINVAL;
>> }
>> mutex_unlock(&minor_lock);
>> @@ -434,9 +434,11 @@ static int uio_open(struct inode *inode, struct file *filep)
>> goto out;
>> }
>>
>> + get_device(&idev->dev);
>> +
>> if (!try_module_get(idev->owner)) {
>> ret = -ENODEV;
>> - goto out;
>> + goto err_module_get;
>> }
>>
>> listener = kmalloc(sizeof(*listener), GFP_KERNEL);
>> @@ -462,6 +464,9 @@ err_infoopen:
>> err_alloc_listener:
>> module_put(idev->owner);
>>
>> +err_module_get:
>> + put_device(&idev->dev);
>> +
>> out:
>> return ret;
>> }
>> @@ -480,11 +485,12 @@ static int uio_release(struct inode *inode, struct file *filep)
>> struct uio_listener *listener = filep->private_data;
>> struct uio_device *idev = listener->dev;
>>
>> - if (idev->info->release)
>> + if (idev->info && idev->info->release)
>> ret = idev->info->release(idev->info, inode);
>>
>> module_put(idev->owner);
>> kfree(listener);
>> + put_device(&idev->dev);
>> return ret;
>> }
>>
>> @@ -493,7 +499,7 @@ static unsigned int uio_poll(struct file *filep, poll_table *wait)
>> struct uio_listener *listener = filep->private_data;
>> struct uio_device *idev = listener->dev;
>>
>> - if (!idev->info->irq)
>> + if (!idev->info || !idev->info->irq)
>> return -EIO;
>>
>> poll_wait(filep, &idev->wait, wait);
>> @@ -511,7 +517,7 @@ static ssize_t uio_read(struct file *filep, char __user *buf,
>> ssize_t retval;
>> s32 event_count;
>>
>> - if (!idev->info->irq)
>> + if (!idev->info || !idev->info->irq)
>> return -EIO;
>>
>> if (count != sizeof(s32))
>> @@ -559,7 +565,7 @@ static ssize_t uio_write(struct file *filep, const char __user *buf,
>> ssize_t retval;
>> s32 irq_on;
>>
>> - if (!idev->info->irq)
>> + if (!idev->info || !idev->info->irq)
>> return -EIO;
>>
>> if (count != sizeof(s32))
>> @@ -785,6 +791,13 @@ static void release_uio_class(void)
>> uio_major_cleanup();
>> }
>>
>> +static void uio_device_release(struct device *dev)
>> +{
>> + struct uio_device *idev = dev_get_drvdata(dev);
>> +
>> + kfree(idev);
>> +}
>> +
>> /**
>> * uio_register_device - register a new userspace IO device
>> * @owner: module that creates the new device
>> @@ -805,7 +818,7 @@ int __uio_register_device(struct module *owner,
>>
>> info->uio_dev = NULL;
>>
>> - idev = devm_kzalloc(parent, sizeof(*idev), GFP_KERNEL);
>> + idev = kzalloc(sizeof(*idev), GFP_KERNEL);
>> if (!idev) {
>> return -ENOMEM;
>> }
>> @@ -819,14 +832,19 @@ int __uio_register_device(struct module *owner,
>> if (ret)
>> return ret;
>>
>> - idev->dev = device_create(&uio_class, parent,
>> - MKDEV(uio_major, idev->minor), idev,
>> - "uio%d", idev->minor);
>> - if (IS_ERR(idev->dev)) {
>> - printk(KERN_ERR "UIO: device register failed\n");
>> - ret = PTR_ERR(idev->dev);
>> + idev->dev.devt = MKDEV(uio_major, idev->minor);
>> + idev->dev.class = &uio_class;
>> + idev->dev.parent = parent;
>> + idev->dev.release = uio_device_release;
>> + dev_set_drvdata(&idev->dev, idev);
>> +
>> + ret = kobject_set_name(&idev->dev.kobj, "uio%d", idev->minor);
>
> You are working with a device, why call a kobject function? Please use
> dev_set_name().

Ok.

>
>> + if (ret)
>> + goto err_device_create;
>> +
>> + ret = device_register(&idev->dev);
>> + if (ret)
>> goto err_device_create;
>> - }
>>
>> ret = uio_dev_add_attributes(idev);
>> if (ret)
>> @@ -835,7 +853,7 @@ int __uio_register_device(struct module *owner,
>> info->uio_dev = idev;
>>
>> if (info->irq && (info->irq != UIO_IRQ_CUSTOM)) {
>> - ret = devm_request_irq(idev->dev, info->irq, uio_interrupt,
>> + ret = request_irq(info->irq, uio_interrupt,
>> info->irq_flags, info->name, idev);
>
> Why move this away from the device lifecycle?
>

I ran into a problem here: after device unregister the parent module (in my case igb_uio in DPDK) can call pci_disable_msi. From the PCI MSI how to:

"Before calling this function, a device driver must always call free_irq()
on any interrupt for which it previously called request_irq().
Failure to do so results in a BUG_ON(), leaving the device with
MSI enabled and thus leaking its vector."

So I need the free_irq, but the device may still have open fds and therefore be around a bit longer waiting for them to be released.

>> if (ret)
>> goto err_request_irq;
>> @@ -846,7 +864,10 @@ int __uio_register_device(struct module *owner,
>> err_request_irq:
>> uio_dev_del_attributes(idev);
>> err_uio_dev_add_attributes:
>> - device_destroy(&uio_class, MKDEV(uio_major, idev->minor));
>> + uio_free_minor(idev);
>> + device_unregister(&idev->dev);
>> + return ret;
>
> This doesn't make sense here, shouldn't these just line up and allow the
> error path to fall though?
>

Ok.

>> err_device_create:
>> uio_free_minor(idev);
>> return ret;
>> @@ -871,8 +892,10 @@ void uio_unregister_device(struct uio_info *info)
>>
>> uio_dev_del_attributes(idev);
>>
>> - device_destroy(&uio_class, MKDEV(uio_major, idev->minor));
>> + device_unregister(&idev->dev);
>> + free_irq(idev->info->irq, idev);
>>
>> + idev->info = NULL;
>
> Why? If this was the last reference count, isn't idev now gone? You
> shouldn't be referencing it anymore.
>

No, the device can be unregistered here but still have outstanding refcount because of open fds. The count can get to zero in the release function, so idev could still be around after this but the calling module could free info.

>> return;
>> }
>> EXPORT_SYMBOL_GPL(uio_unregister_device);
>> diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h
>> index 32c0e83..a11feeb 100644
>> --- a/include/linux/uio_driver.h
>> +++ b/include/linux/uio_driver.h
>> @@ -65,7 +65,7 @@ struct uio_port {
>>
>> struct uio_device {
>> struct module *owner;
>> - struct device *dev;
>> + struct device dev;
>
> I like this change, I just think some of the details above aren't quite
> right.
>
> thanks,
>
> greg k-h
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/