[PATCH v2] platform/chrome: cros_ec: Use per-device lockdep key

From: Chen-Yu Tsai
Date: Wed Jan 11 2023 - 02:41:58 EST


Lockdep reports a bogus possible deadlock on MT8192 Chromebooks due to
the following lock sequences:

1. lock(i2c_register_adapter) [1]; lock(&ec_dev->lock)
2. lock(&ec_dev->lock); lock(prepare_lock);

The actual dependency chains are much longer. The shortened version
looks somewhat like:

1. cros-ec-rpmsg on mtk-scp
ec_dev->lock -> prepare_lock
2. In rt5682_i2c_probe() on native I2C bus:
prepare_lock -> regmap->lock -> (possibly) i2c_adapter->bus_lock
3. In rt5682_i2c_probe() on native I2C bus:
regmap->lock -> i2c_adapter->bus_lock
4. In sbs_probe() on i2c-cros-ec-tunnel I2C bus attached on cros-ec:
i2c_adapter->bus_lock -> ec_dev->lock

While lockdep is correct that the shared lockdep classes have a circular
dependency, it is bogus because

a) 2+3 happen on a native I2C bus
b) 4 happens on the actual EC on ChromeOS devices
c) 1 happens on the SCP coprocessor on MediaTek Chromebooks that just
happens to expose a cros-ec interface, but does not have an
i2c-cros-ec-tunnel I2C bus

In short, the "dependencies" are actually on different devices.

Setup a per-device lockdep key for cros_ec devices so lockdep can tell
the two instances apart. This helps with getting rid of the bogus
lockdep warning. For ChromeOS devices that only have one cros-ec
instance this doesn't change anything.

Also add a missing mutex_destroy, just to make the teardown complete.

[1] This is likely the per I2C bus lock with shared lockdep class

Signed-off-by: Chen-Yu Tsai <wenst@xxxxxxxxxxxx>
---
Changes since v1:
- Changed subject prefix from "chromeos" to "chrome"
- Changed "passthrough I2C bus" to exact name, i2c-cros-ec-tunnel
- Added kerneldoc for new "lockdep_key" field

drivers/platform/chrome/cros_ec.c | 14 +++++++++++---
include/linux/platform_data/cros_ec_proto.h | 4 ++++
2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/drivers/platform/chrome/cros_ec.c b/drivers/platform/chrome/cros_ec.c
index ec733f683f34..4ae57820afd5 100644
--- a/drivers/platform/chrome/cros_ec.c
+++ b/drivers/platform/chrome/cros_ec.c
@@ -198,12 +198,14 @@ int cros_ec_register(struct cros_ec_device *ec_dev)
if (!ec_dev->dout)
return -ENOMEM;

+ lockdep_register_key(&ec_dev->lockdep_key);
mutex_init(&ec_dev->lock);
+ lockdep_set_class(&ec_dev->lock, &ec_dev->lockdep_key);

err = cros_ec_query_all(ec_dev);
if (err) {
dev_err(dev, "Cannot identify the EC: error %d\n", err);
- return err;
+ goto destroy_mutex;
}

if (ec_dev->irq > 0) {
@@ -215,7 +217,7 @@ int cros_ec_register(struct cros_ec_device *ec_dev)
if (err) {
dev_err(dev, "Failed to request IRQ %d: %d\n",
ec_dev->irq, err);
- return err;
+ goto destroy_mutex;
}
}

@@ -226,7 +228,8 @@ int cros_ec_register(struct cros_ec_device *ec_dev)
if (IS_ERR(ec_dev->ec)) {
dev_err(ec_dev->dev,
"Failed to create CrOS EC platform device\n");
- return PTR_ERR(ec_dev->ec);
+ err = PTR_ERR(ec_dev->ec);
+ goto destroy_mutex;
}

if (ec_dev->max_passthru) {
@@ -292,6 +295,9 @@ int cros_ec_register(struct cros_ec_device *ec_dev)
exit:
platform_device_unregister(ec_dev->ec);
platform_device_unregister(ec_dev->pd);
+destroy_mutex:
+ mutex_destroy(&ec_dev->lock);
+ lockdep_unregister_key(&ec_dev->lockdep_key);
return err;
}
EXPORT_SYMBOL(cros_ec_register);
@@ -309,6 +315,8 @@ void cros_ec_unregister(struct cros_ec_device *ec_dev)
if (ec_dev->pd)
platform_device_unregister(ec_dev->pd);
platform_device_unregister(ec_dev->ec);
+ mutex_destroy(&ec_dev->lock);
+ lockdep_unregister_key(&ec_dev->lockdep_key);
}
EXPORT_SYMBOL(cros_ec_unregister);

diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h
index 017d502ed66e..3db26c891d5c 100644
--- a/include/linux/platform_data/cros_ec_proto.h
+++ b/include/linux/platform_data/cros_ec_proto.h
@@ -9,6 +9,7 @@
#define __LINUX_CROS_EC_PROTO_H

#include <linux/device.h>
+#include <linux/lockdep_types.h>
#include <linux/mutex.h>
#include <linux/notifier.h>

@@ -122,6 +123,8 @@ struct cros_ec_command {
* command. The caller should check msg.result for the EC's result
* code.
* @pkt_xfer: Send packet to EC and get response.
+ * @lockdep_key: Lockdep class for each instance. Unused if CONFIG_LOCKDEP is
+ * not enabled.
* @lock: One transaction at a time.
* @mkbp_event_supported: 0 if MKBP not supported. Otherwise its value is
* the maximum supported version of the MKBP host event
@@ -166,6 +169,7 @@ struct cros_ec_device {
struct cros_ec_command *msg);
int (*pkt_xfer)(struct cros_ec_device *ec,
struct cros_ec_command *msg);
+ struct lock_class_key lockdep_key;
struct mutex lock;
u8 mkbp_event_supported;
bool host_sleep_v1;
--
2.39.0.314.g84b9a713c41-goog