Re: [PATCH] pci-sysfs: fix a potential deadlock when concurrent remove&rescanpci device via sysfs

From: Gu Zheng
Date: Wed Oct 30 2013 - 21:28:20 EST


Hi Bjorn,

On 10/31/2013 01:19 AM, Bjorn Helgaas wrote:

> On Wed, Oct 30, 2013 at 06:12:27PM +0800, Gu Zheng wrote:
>> There is a potential deadlock situation when we manipulate pci device
>> (remove&rescan) via the pci-sysfs user interfaces simultaneously.
>>
>> Privious patch:
>> https://lkml.org/lkml/2012/12/27/10
>>
>> Related bug reports on bugzilla:
>> https://bugzilla.kernel.org/show_bug.cgi?id=60672
>> https://bugzilla.kernel.org/show_bug.cgi?id=60695
>>
>> deadlock info described as following:
<...>
>>
>> path1: sysfs remove device: | path2: sysfs rescan device:
>> sysfs_schedule_callback_work() | sysfs_write_file()
>> remove_callback() | flush_write_buffer()
>> *1* mutex_lock(&pci_remove_rescan_mutex)|*2* sysfs_get_active(attr_sd)
>> ... | dev_attr_store()
>> device_remove_file() | dev_rescan_store()
>> ... |*4* mutex_lock(&pci_remove_rescan_mutex)
>> *3* sysfs_deactivate(sd) | ...
>> wait_for_completion() |*5* sysfs_put_active(attr_sd)
>> *6* mutex_unlock(&pci_remove_rescan_mutex)
>>
>> If path1 first holds the pci_remove_rescan_mutex at *1*, then another path
>> called path2 actived and runs to *2* before path1 runs to *3*, we now runs
>> to a deadlock situation:
>> Path1 holds the mutex waiting path2 to decrease sysfs_dirent's s_active
>> counter at *5*, but path2 is blocked at *4* when trying to get the
>> pci_remove_rescan_mutex. The mutex won't be put by path1 until it reach
>> *6*, but it's now blocked at *3*.
>>
>> So we use mutex_try_lock to avoid this deadlock, and additional wait/restart_syscall
>> steps are used to make the fail path of try_lock more friendly to user space task.
>>
>> Signed-off-by: Gu Zheng <guz.fnst@xxxxxxxxxxxxxx>
>> ---
>> drivers/pci/pci-sysfs.c | 48 +++++++++++++++++++++++++++++++++++++++-------
>> 1 files changed, 40 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
>> index 7128cfd..0dac6f4 100644
>> --- a/drivers/pci/pci-sysfs.c
>> +++ b/drivers/pci/pci-sysfs.c
>> @@ -29,6 +29,7 @@
>> #include <linux/slab.h>
>> #include <linux/vgaarb.h>
>> #include <linux/pm_runtime.h>
>> +#include <linux/delay.h>
>> #include "pci.h"
>>
>> static int sysfs_initialized; /* = 0 */
>> @@ -285,6 +286,25 @@ msi_bus_store(struct device *dev, struct device_attribute *attr,
>> }
>>
>> static DEFINE_MUTEX(pci_remove_rescan_mutex);
>> +
>> +static void pci_remove_rescan_lock(void)
>> +{
>> + mutex_lock(&pci_remove_rescan_mutex);
>> +}
>> +
>> +static int pci_remove_rescan_lock_sysfs(void)
>> +{
>> + if (mutex_trylock(&pci_remove_rescan_mutex))
>> + return 0;
>> + /* Avoid busy looping (20 ms of sleep should do). */
>> + msleep(20);
>> + return restart_syscall();
>
> There are very few uses of restart_syscall(). I don't believe this
> situation is so unusual and special that we need to use it here.

What about queuing rescan routines into sysfs wrokqueue just like the
remove ones? I remember that your also mentioned this way in the previous
patch threads.

Regards,
Gu

>
>> +}
>> +
>> +static void pci_remove_rescan_unlock(void)
>> +{
>> + mutex_unlock(&pci_remove_rescan_mutex);
>> +}
>> static ssize_t bus_rescan_store(struct bus_type *bus, const char *buf,
>> size_t count)
>> {
>> @@ -295,10 +315,14 @@ static ssize_t bus_rescan_store(struct bus_type *bus, const char *buf,
>> return -EINVAL;
>>
>> if (val) {
>> - mutex_lock(&pci_remove_rescan_mutex);
>> + int ret;
>> +
>> + ret = pci_remove_rescan_lock_sysfs();
>> + if (ret)
>> + return ret;
>> while ((b = pci_find_next_bus(b)) != NULL)
>> pci_rescan_bus(b);
>> - mutex_unlock(&pci_remove_rescan_mutex);
>> + pci_remove_rescan_unlock();
>> }
>> return count;
>> }
>> @@ -319,9 +343,13 @@ dev_rescan_store(struct device *dev, struct device_attribute *attr,
>> return -EINVAL;
>>
>> if (val) {
>> - mutex_lock(&pci_remove_rescan_mutex);
>> + int ret;
>> +
>> + ret = pci_remove_rescan_lock_sysfs();
>> + if (ret)
>> + return ret;
>> pci_rescan_bus(pdev->bus);
>> - mutex_unlock(&pci_remove_rescan_mutex);
>> + pci_remove_rescan_unlock();
>> }
>> return count;
>> }
>> @@ -332,9 +360,9 @@ static void remove_callback(struct device *dev)
>> {
>> struct pci_dev *pdev = to_pci_dev(dev);
>>
>> - mutex_lock(&pci_remove_rescan_mutex);
>> + pci_remove_rescan_lock();
>> pci_stop_and_remove_bus_device(pdev);
>> - mutex_unlock(&pci_remove_rescan_mutex);
>> + pci_remove_rescan_unlock();
>> }
>>
>> static ssize_t
>> @@ -370,12 +398,16 @@ dev_bus_rescan_store(struct device *dev, struct device_attribute *attr,
>> return -EINVAL;
>>
>> if (val) {
>> - mutex_lock(&pci_remove_rescan_mutex);
>> + int ret;
>> +
>> + ret = pci_remove_rescan_lock_sysfs();
>> + if (ret)
>> + return ret;
>> if (!pci_is_root_bus(bus) && list_empty(&bus->devices))
>> pci_rescan_bus_bridge_resize(bus->self);
>> else
>> pci_rescan_bus(bus);
>> - mutex_unlock(&pci_remove_rescan_mutex);
>> + pci_remove_rescan_unlock();
>> }
>> return count;
>> }
>> --
>> 1.7.7
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-pci" in
>> the body of a message to majordomo@xxxxxxxxxxxxxxx
>> More majordomo info at http://vger.kernel.org/majordomo-info.html
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
>


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/