Re: [PATCH v3 1/2] scsi: ufs: Probe for temperature notification support

From: Guenter Roeck
Date: Sun Sep 12 2021 - 11:38:10 EST


On 9/12/21 6:19 AM, Avri Altman wrote:
Probe the dExtendedUFSFeaturesSupport register for the device's
temperature notification support, and if supported, add a hw monitor
device.

Signed-off-by: Avri Altman <avri.altman@xxxxxxx>
---
drivers/scsi/ufs/Kconfig | 10 ++
drivers/scsi/ufs/Makefile | 1 +
drivers/scsi/ufs/ufs-hwmon.c | 179 +++++++++++++++++++++++++++++++++++
drivers/scsi/ufs/ufs.h | 6 ++
drivers/scsi/ufs/ufshcd.c | 28 ++++++
drivers/scsi/ufs/ufshcd.h | 18 ++++
6 files changed, 242 insertions(+)
create mode 100644 drivers/scsi/ufs/ufs-hwmon.c

diff --git a/drivers/scsi/ufs/Kconfig b/drivers/scsi/ufs/Kconfig
index 432df76e6318..b930f29fc375 100644
--- a/drivers/scsi/ufs/Kconfig
+++ b/drivers/scsi/ufs/Kconfig
@@ -199,3 +199,13 @@ config SCSI_UFS_FAULT_INJECTION
help
Enable fault injection support in the UFS driver. This makes it easier
to test the UFS error handler and abort handler.
+
+config SCSI_UFS_HWMON
+ bool "UFS Temperature Notification"
+ depends on SCSI_UFSHCD && HWMON
+ help
+ This provides support for UFS hardware monitoring. If enabled,
+ a hardware monitoring device will be created for the UFS device.
+
+ If unsure, say N.
+

git complains about blank line at EOF.

diff --git a/drivers/scsi/ufs/Makefile b/drivers/scsi/ufs/Makefile
index c407da9b5171..966048875b50 100644
--- a/drivers/scsi/ufs/Makefile
+++ b/drivers/scsi/ufs/Makefile
@@ -10,6 +10,7 @@ ufshcd-core-$(CONFIG_SCSI_UFS_BSG) += ufs_bsg.o
ufshcd-core-$(CONFIG_SCSI_UFS_CRYPTO) += ufshcd-crypto.o
ufshcd-core-$(CONFIG_SCSI_UFS_HPB) += ufshpb.o
ufshcd-core-$(CONFIG_SCSI_UFS_FAULT_INJECTION) += ufs-fault-injection.o
+ufshcd-core-$(CONFIG_SCSI_UFS_HWMON) += ufs-hwmon.o
obj-$(CONFIG_SCSI_UFS_DWC_TC_PCI) += tc-dwc-g210-pci.o ufshcd-dwc.o tc-dwc-g210.o
obj-$(CONFIG_SCSI_UFS_DWC_TC_PLATFORM) += tc-dwc-g210-pltfrm.o ufshcd-dwc.o tc-dwc-g210.o
diff --git a/drivers/scsi/ufs/ufs-hwmon.c b/drivers/scsi/ufs/ufs-hwmon.c
new file mode 100644
index 000000000000..a50e83f645f4
--- /dev/null
+++ b/drivers/scsi/ufs/ufs-hwmon.c
@@ -0,0 +1,179 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * UFS hardware monitoring support
+ * Copyright (c) 2021, Western Digital Corporation
+ */
+
+#include <linux/hwmon.h>
+
+#include "ufshcd.h"
+
+struct ufs_hwmon_data {
+ struct ufs_hba *hba;
+ u8 mask;
+};
+
+static bool ufs_temp_enabled(struct ufs_hba *hba, u8 mask)
+{
+ u32 ee_mask;
+
+ if (ufshcd_query_attr(hba, UPIU_QUERY_OPCODE_READ_ATTR,
+ QUERY_ATTR_IDN_EE_CONTROL, 0, 0, &ee_mask))
+ return false;
+
+ return (mask & ee_mask & MASK_EE_TOO_HIGH_TEMP) ||
+ (mask & ee_mask & MASK_EE_TOO_LOW_TEMP);
+}
+
+static bool ufs_temp_valid(struct ufs_hba *hba, u8 mask,
+ enum attr_idn idn, u32 value)
+{
+ return (idn == QUERY_ATTR_IDN_CASE_ROUGH_TEMP && value >= 1 &&
+ value <= 250 && ufs_temp_enabled(hba, mask)) ||
+ (idn == QUERY_ATTR_IDN_HIGH_TEMP_BOUND && value >= 100 &&
+ value <= 250) ||
+ (idn == QUERY_ATTR_IDN_LOW_TEMP_BOUND && value >= 1 &&
+ value <= 80);
+}
+
The value ranges checed above suggest that the temperature is reported
in degrees C (or maybe degrees C with an offset). The hwmon API expects
temperatures to be reported in milli-degrees C, and I don't see a
conversion in the actual read functions. What does the "sensors" command report ?

+static int ufs_get_temp(struct ufs_hba *hba, u8 mask, enum attr_idn idn)
+{
+ u32 value;
+
+ if (ufshcd_query_attr(hba, UPIU_QUERY_OPCODE_READ_ATTR, idn, 0, 0,
+ &value))

checkpatch states that alignment is off, and I am quite sure this fits into one
line anyway (with the 100-column limit). There are more instances with bad alignment
according to checkpatch.

Also, ufshcd_query_attr() returns a valid Linux error code. That should be
returned to the caller and not be replaced. More on that below.

+ return 0;
+
+ if (ufs_temp_valid(hba, mask, idn, value))
+ return value - 80;
+

This again suggests that the temperature is not milli-degrees C.

Is there reason to believe that this validation is necessary ?
Note that this reports an "error" if the returned temperature value
happens to have a value of 80. Again, more on that below.

+ return 0;
+}
+
+static int ufs_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
+ u32 attr, int channel, long *val)
+{
+ struct ufs_hwmon_data *data = dev_get_drvdata(dev);
+ struct ufs_hba *hba = data->hba;
+ u8 mask = data->mask;
+ int err = 0;
+ bool get_temp = true;
+
+ if (type != hwmon_temp)
+ return 0;
+
+ down(&hba->host_sem);
+
+ if (!ufshcd_is_user_access_allowed(hba)) {
+ up(&hba->host_sem);
+ return -EBUSY;
+ }
+
+ ufshcd_rpm_get_sync(hba);
+
+ switch (attr) {
+ case hwmon_temp_enable:
+ *val = ufs_temp_enabled(hba, mask);
+ get_temp = false;
+

This seems to be read-only, and the mask only affects the limit registers as
far as I con see. If so, this is wrong: The mask should be used to enable
or hide the limit attributes as needed. If the mask is 0, and if this means
that the current temperature is not reported either, the driver should not
instantiate at all.

The "enable" attribute only makes sense if it can be used to actually enable
or disable a specific sensor, and is not tied to limit attributes but to the
actual sensor values.

+ break;
+ case hwmon_temp_max_alarm:
+ *val = ufs_get_temp(hba, mask, QUERY_ATTR_IDN_HIGH_TEMP_BOUND);
+
+ break;
+ case hwmon_temp_min_alarm:
+ *val = ufs_get_temp(hba, mask, QUERY_ATTR_IDN_LOW_TEMP_BOUND);
+
+ break;
+ case hwmon_temp_input:
+ *val = ufs_get_temp(hba, mask, QUERY_ATTR_IDN_CASE_ROUGH_TEMP);
+
If an enable attribute exists and is 0 (disabled), this should return -ENODATA.
In this case, that would imply that the driver should not be instantiated
in the first place since it has nothing to report.

+ break;
+ default:
+ err = -EOPNOTSUPP;
+
+ break;
+ }
+
+ ufshcd_rpm_put_sync(hba);
+
+ up(&hba->host_sem);
+
+ if (get_temp && !err && *val == 0)
+ err = -EINVAL;
+
That is an odd way of detection errors. If it was in the hwmon subsystem,
I'd ask for the error handling to be moved into the case statements. On
top of that, interpreting a return value of "0" as error seems wrong.
ufs_get_temp() returns 0 if the measured temperature or the reported limit
happens to have a value of 80, and that is perfectly valid. If ufs_get_temp()
reports an error, it should report that as error.

Also, EINVAL is "invalid argument", which I am quite sure does not apply here.

+ return err;
+}
+
+static umode_t ufs_hwmon_is_visible(const void *_data,
+ enum hwmon_sensor_types type,
+ u32 attr, int channel)
+{
+ if (type != hwmon_temp)
+ return 0;
+
+ switch (attr) {
+ case hwmon_temp_enable:
+ case hwmon_temp_max_alarm:
+ case hwmon_temp_min_alarm:
+ case hwmon_temp_input:
+ return 0444;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static const struct hwmon_channel_info *ufs_hwmon_info[] = {
+ HWMON_CHANNEL_INFO(temp, HWMON_T_ENABLE | HWMON_T_INPUT |
+ HWMON_T_MIN_ALARM | HWMON_T_MAX_ALARM),
+ NULL
+};
+
+static const struct hwmon_ops ufs_hwmon_ops = {
+ .is_visible = ufs_hwmon_is_visible,
+ .read = ufs_hwmon_read,
+};
+
+static const struct hwmon_chip_info ufs_hwmon_hba_info = {
+ .ops = &ufs_hwmon_ops,
+ .info = ufs_hwmon_info,
+};
+
+void ufs_hwmon_probe(struct ufs_hba *hba, u8 mask)
+{
+ struct device *dev = hba->dev;
+ struct ufs_hwmon_data *data;
+ struct device *hwmon;
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return;
+
+ data->hba = hba;
+ data->mask = mask;
+
+ hwmon = hwmon_device_register_with_info(dev, "ufs",
+ data, &ufs_hwmon_hba_info,
+ NULL);
+ if (IS_ERR(hwmon)) {
+ dev_warn(dev, "Failed to instantiate hwmon device\n");
+ kfree(data);
+ return;
+ }
+
+ hba->hwmon_device = hwmon;
+}
+
+void ufs_hwmon_remove(struct ufs_hba *hba)
+{
+ struct ufs_hwmon_data *data;
+
+ if (!hba->hwmon_device)
+ return;
+
+ data = dev_get_drvdata(hba->hwmon_device);
+ hwmon_device_unregister(hba->hwmon_device);
+ hba->hwmon_device = NULL;
+ kfree(data);
+}
diff --git a/drivers/scsi/ufs/ufs.h b/drivers/scsi/ufs/ufs.h
index 8c6b38b1b142..171b27be7b1d 100644
--- a/drivers/scsi/ufs/ufs.h
+++ b/drivers/scsi/ufs/ufs.h
@@ -152,6 +152,9 @@ enum attr_idn {
QUERY_ATTR_IDN_PSA_STATE = 0x15,
QUERY_ATTR_IDN_PSA_DATA_SIZE = 0x16,
QUERY_ATTR_IDN_REF_CLK_GATING_WAIT_TIME = 0x17,
+ QUERY_ATTR_IDN_CASE_ROUGH_TEMP = 0x18,
+ QUERY_ATTR_IDN_HIGH_TEMP_BOUND = 0x19,
+ QUERY_ATTR_IDN_LOW_TEMP_BOUND = 0x1A,
QUERY_ATTR_IDN_WB_FLUSH_STATUS = 0x1C,
QUERY_ATTR_IDN_AVAIL_WB_BUFF_SIZE = 0x1D,
QUERY_ATTR_IDN_WB_BUFF_LIFE_TIME_EST = 0x1E,
@@ -338,6 +341,9 @@ enum {
/* Possible values for dExtendedUFSFeaturesSupport */
enum {
+ UFS_DEV_LOW_TEMP_NOTIF = BIT(4),
+ UFS_DEV_HIGH_TEMP_NOTIF = BIT(5),
+ UFS_DEV_EXT_TEMP_NOTIF = BIT(6),
UFS_DEV_HPB_SUPPORT = BIT(7),
UFS_DEV_WRITE_BOOSTER_SUP = BIT(8),
};
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 67889d74761c..90c2e9677435 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -7469,6 +7469,31 @@ static void ufshcd_wb_probe(struct ufs_hba *hba, u8 *desc_buf)
hba->caps &= ~UFSHCD_CAP_WB_EN;
}
+static void ufshcd_temp_notif_probe(struct ufs_hba *hba, u8 *desc_buf)
+{
+ struct ufs_dev_info *dev_info = &hba->dev_info;
+ u32 ext_ufs_feature;
+ u8 mask = 0;
+
+ if (!(hba->caps & UFSHCD_CAP_TEMP_NOTIF) ||
+ dev_info->wspecversion < 0x300)

I am quite sure this fits a single line.

+ return;
+
+ ext_ufs_feature = get_unaligned_be32(desc_buf +
+ DEVICE_DESC_PARAM_EXT_UFS_FEATURE_SUP);
+
+ if (ext_ufs_feature & UFS_DEV_LOW_TEMP_NOTIF)
+ mask |= MASK_EE_TOO_LOW_TEMP;
+
+ if (ext_ufs_feature & UFS_DEV_HIGH_TEMP_NOTIF)
+ mask |= MASK_EE_TOO_HIGH_TEMP;
+
+ if (mask) {
+ ufshcd_enable_ee(hba, mask);
+ ufs_hwmon_probe(hba, mask);
+ }
+}
+
void ufshcd_fixup_dev_quirks(struct ufs_hba *hba, struct ufs_dev_fix *fixups)
{
struct ufs_dev_fix *f;
@@ -7564,6 +7589,8 @@ static int ufs_get_device_desc(struct ufs_hba *hba)
ufshcd_wb_probe(hba, desc_buf);
+ ufshcd_temp_notif_probe(hba, desc_buf);
+
/*
* ufshcd_read_string_desc returns size of the string
* reset the error value
@@ -9408,6 +9435,7 @@ void ufshcd_remove(struct ufs_hba *hba)
{
if (hba->sdev_ufs_device)
ufshcd_rpm_get_sync(hba);
+ ufs_hwmon_remove(hba);
ufs_bsg_remove(hba);
ufshpb_remove(hba);
ufs_sysfs_remove_nodes(hba->dev);
diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
index 4723f27a55d1..798a408d71e5 100644
--- a/drivers/scsi/ufs/ufshcd.h
+++ b/drivers/scsi/ufs/ufshcd.h
@@ -653,6 +653,12 @@ enum ufshcd_caps {
* in order to exit DeepSleep state.
*/
UFSHCD_CAP_DEEPSLEEP = 1 << 10,
+
+ /*
+ * This capability allows the host controller driver to use temperature
+ * notification if it is supported by the UFS device.
+ */
+ UFSHCD_CAP_TEMP_NOTIF = 1 << 11,
};
struct ufs_hba_variant_params {
@@ -789,6 +795,10 @@ struct ufs_hba {
struct scsi_device *sdev_ufs_device;
struct scsi_device *sdev_rpmb;
+#ifdef CONFIG_SCSI_UFS_HWMON
+ struct device *hwmon_device;
+#endif
+
enum ufs_dev_pwr_mode curr_dev_pwr_mode;
enum uic_link_state uic_link_state;
/* Desired UFS power management level during runtime PM */
@@ -1050,6 +1060,14 @@ static inline u8 ufshcd_wb_get_query_index(struct ufs_hba *hba)
return 0;
}
+#ifdef CONFIG_SCSI_UFS_HWMON
+void ufs_hwmon_probe(struct ufs_hba *hba, u8 mask);
+void ufs_hwmon_remove(struct ufs_hba *hba);
+#else
+static inline void ufs_hwmon_probe(struct ufs_hba *hba, u8 mask) {}
+static inline void ufs_hwmon_remove(struct ufs_hba *hba) {}
+#endif
+
#ifdef CONFIG_PM
extern int ufshcd_runtime_suspend(struct device *dev);
extern int ufshcd_runtime_resume(struct device *dev);