Re: [PATCH v2 13/15] hwmon: peci: Add dimmtemp driver

From: Guenter Roeck
Date: Wed Aug 04 2021 - 13:33:43 EST


On 8/4/21 3:46 AM, Winiarska, Iwona wrote:
On Tue, 2021-08-03 at 08:39 -0700, Guenter Roeck wrote:
On Tue, Aug 03, 2021 at 01:31:32PM +0200, Iwona Winiarska wrote:
Add peci-dimmtemp driver for Temperature Sensor on DIMM readings that
are accessible via the processor PECI interface.

The main use case for the driver (and PECI interface) is out-of-band
management, where we're able to obtain thermal readings from an external
entity connected with PECI, e.g. BMC on server platforms.

Co-developed-by: Jae Hyun Yoo <jae.hyun.yoo@xxxxxxxxxxxxxxx>
Signed-off-by: Jae Hyun Yoo <jae.hyun.yoo@xxxxxxxxxxxxxxx>
Signed-off-by: Iwona Winiarska <iwona.winiarska@xxxxxxxxx>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@xxxxxxxxxxxxxxx>
---
Note that the timeout was completely removed - we're going to probe
for detected DIMMs every 5 seconds until we reach "stable" state of
either getting correct DIMM data or getting all -EINVAL (which
suggest that the CPU doesn't have any DIMMs).

 drivers/hwmon/peci/Kconfig    |  13 +
 drivers/hwmon/peci/Makefile   |   2 +
 drivers/hwmon/peci/dimmtemp.c | 614 ++++++++++++++++++++++++++++++++++
 3 files changed, 629 insertions(+)
 create mode 100644 drivers/hwmon/peci/dimmtemp.c

diff --git a/drivers/hwmon/peci/Kconfig b/drivers/hwmon/peci/Kconfig
index e10eed68d70a..9d32a57badfe 100644
--- a/drivers/hwmon/peci/Kconfig
+++ b/drivers/hwmon/peci/Kconfig
@@ -14,5 +14,18 @@ config SENSORS_PECI_CPUTEMP
          This driver can also be built as a module. If so, the module
          will be called peci-cputemp.
+config SENSORS_PECI_DIMMTEMP
+       tristate "PECI DIMM temperature monitoring client"
+       depends on PECI
+       select SENSORS_PECI
+       select PECI_CPU
+       help
+         If you say yes here you get support for the generic Intel PECI
hwmon
+         driver which provides Temperature Sensor on DIMM readings that are
+         accessible via the processor PECI interface.
+
+         This driver can also be built as a module. If so, the module
+         will be called peci-dimmtemp.
+
 config SENSORS_PECI
        tristate
diff --git a/drivers/hwmon/peci/Makefile b/drivers/hwmon/peci/Makefile
index e8a0ada5ab1f..191cfa0227f3 100644
--- a/drivers/hwmon/peci/Makefile
+++ b/drivers/hwmon/peci/Makefile
@@ -1,5 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0-only
 peci-cputemp-y := cputemp.o
+peci-dimmtemp-y := dimmtemp.o
 obj-$(CONFIG_SENSORS_PECI_CPUTEMP)     += peci-cputemp.o
+obj-$(CONFIG_SENSORS_PECI_DIMMTEMP)    += peci-dimmtemp.o
diff --git a/drivers/hwmon/peci/dimmtemp.c b/drivers/hwmon/peci/dimmtemp.c
new file mode 100644
index 000000000000..6264c29bb6c0
--- /dev/null
+++ b/drivers/hwmon/peci/dimmtemp.c
@@ -0,0 +1,614 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (c) 2018-2021 Intel Corporation
+
+#include <linux/auxiliary_bus.h>
+#include <linux/bitfield.h>
+#include <linux/bitops.h>
+#include <linux/hwmon.h>
+#include <linux/jiffies.h>
+#include <linux/module.h>
+#include <linux/peci.h>
+#include <linux/peci-cpu.h>
+#include <linux/units.h>
+#include <linux/workqueue.h>
+#include <linux/x86/intel-family.h>
+
+#include "common.h"
+
+#define DIMM_MASK_CHECK_DELAY_JIFFIES  msecs_to_jiffies(5000)
+
+/* Max number of channel ranks and DIMM index per channel */
+#define CHAN_RANK_MAX_ON_HSX   8
+#define DIMM_IDX_MAX_ON_HSX    3
+#define CHAN_RANK_MAX_ON_BDX   4
+#define DIMM_IDX_MAX_ON_BDX    3
+#define CHAN_RANK_MAX_ON_BDXD  2
+#define DIMM_IDX_MAX_ON_BDXD   2
+#define CHAN_RANK_MAX_ON_SKX   6
+#define DIMM_IDX_MAX_ON_SKX    2
+#define CHAN_RANK_MAX_ON_ICX   8
+#define DIMM_IDX_MAX_ON_ICX    2
+#define CHAN_RANK_MAX_ON_ICXD  4
+#define DIMM_IDX_MAX_ON_ICXD   2
+
+#define CHAN_RANK_MAX          CHAN_RANK_MAX_ON_HSX
+#define DIMM_IDX_MAX           DIMM_IDX_MAX_ON_HSX
+#define DIMM_NUMS_MAX          (CHAN_RANK_MAX * DIMM_IDX_MAX)
+
+#define CPU_SEG_MASK           GENMASK(23, 16)
+#define GET_CPU_SEG(x)         (((x) & CPU_SEG_MASK) >> 16)
+#define CPU_BUS_MASK           GENMASK(7, 0)
+#define GET_CPU_BUS(x)         ((x) & CPU_BUS_MASK)
+
+#define DIMM_TEMP_MAX          GENMASK(15, 8)
+#define DIMM_TEMP_CRIT         GENMASK(23, 16)
+#define GET_TEMP_MAX(x)                (((x) & DIMM_TEMP_MAX) >> 8)
+#define GET_TEMP_CRIT(x)       (((x) & DIMM_TEMP_CRIT) >> 16)
+
+struct peci_dimmtemp;
+
+struct dimm_info {
+       int chan_rank_max;
+       int dimm_idx_max;
+       u8 min_peci_revision;
+       int (*read_thresholds)(struct peci_dimmtemp *priv, int dimm_order,
+                              int chan_rank, u32 *data);
+};
+
+struct peci_dimm_thresholds {
+       long temp_max;
+       long temp_crit;
+       struct peci_sensor_state state;
+};
+
+enum peci_dimm_threshold_type {
+       temp_max_type,
+       temp_crit_type,
+};
+
+struct peci_dimmtemp {
+       struct peci_device *peci_dev;
+       struct device *dev;
+       const char *name;
+       const struct dimm_info *gen_info;
+       struct delayed_work detect_work;
+       struct {
+               struct peci_sensor_data temp;
+               struct peci_dimm_thresholds thresholds;
+       } dimm[DIMM_NUMS_MAX];
+       char **dimmtemp_label;
+       DECLARE_BITMAP(dimm_mask, DIMM_NUMS_MAX);
+};
+
+static u8 __dimm_temp(u32 reg, int dimm_order)
+{
+       return (reg >> (dimm_order * 8)) & 0xff;
+}
+
+static int get_dimm_temp(struct peci_dimmtemp *priv, int dimm_no, long
*val)
+{
+       int dimm_order = dimm_no % priv->gen_info->dimm_idx_max;
+       int chan_rank = dimm_no / priv->gen_info->dimm_idx_max;
+       u32 data;
+       int ret;

        int ret = 0;

+
+       mutex_lock(&priv->dimm[dimm_no].temp.state.lock);
+       if (!peci_sensor_need_update(&priv->dimm[dimm_no].temp.state))
+               goto skip_update;
+
+       ret = peci_pcs_read(priv->peci_dev, PECI_PCS_DDR_DIMM_TEMP,
chan_rank, &data);
+       if (ret) {
+               mutex_unlock(&priv->dimm[dimm_no].temp.state.lock);
+               return ret;
+       }

        if (ret)
                goto unlock;

+
+       priv->dimm[dimm_no].temp.value = __dimm_temp(data, dimm_order) *
MILLIDEGREE_PER_DEGREE;
+
+       peci_sensor_mark_updated(&priv->dimm[dimm_no].temp.state);
+
+skip_update:
+       *val = priv->dimm[dimm_no].temp.value;

unlock:
+       mutex_unlock(&priv->dimm[dimm_no].temp.state.lock);
+       return 0;

        return ret;

Ack.


+}
+
+static int update_thresholds(struct peci_dimmtemp *priv, int dimm_no)
+{
+       int dimm_order = dimm_no % priv->gen_info->dimm_idx_max;
+       int chan_rank = dimm_no / priv->gen_info->dimm_idx_max;
+       u32 data;
+       int ret;
+
+       if (!peci_sensor_need_update(&priv->dimm[dimm_no].thresholds.state))
+               return 0;
+
+       ret = priv->gen_info->read_thresholds(priv, dimm_order, chan_rank,
&data);
+       if (ret == -ENODATA) /* Use default or previous value */
+               return 0;
+       if (ret)
+               return ret;
+
+       priv->dimm[dimm_no].thresholds.temp_max = GET_TEMP_MAX(data) *
MILLIDEGREE_PER_DEGREE;
+       priv->dimm[dimm_no].thresholds.temp_crit = GET_TEMP_CRIT(data) *
MILLIDEGREE_PER_DEGREE;
+
+       peci_sensor_mark_updated(&priv->dimm[dimm_no].thresholds.state);
+
+       return 0;
+}
+
+static int get_dimm_thresholds(struct peci_dimmtemp *priv, enum
peci_dimm_threshold_type type,
+                              int dimm_no, long *val)
+{
+       int ret;
+
+       mutex_lock(&priv->dimm[dimm_no].thresholds.state.lock);
+       ret = update_thresholds(priv, dimm_no);
+       if (ret)
+               goto unlock;
+
+       switch (type) {
+       case temp_max_type:
+               *val = priv->dimm[dimm_no].thresholds.temp_max;
+               break;
+       case temp_crit_type:
+               *val = priv->dimm[dimm_no].thresholds.temp_crit;
+               break;
+       default:
+               ret = -EOPNOTSUPP;
+               break;
+       }
+unlock:
+       mutex_unlock(&priv->dimm[dimm_no].thresholds.state.lock);
+
+       return ret;
+}
+
+static int dimmtemp_read_string(struct device *dev,
+                               enum hwmon_sensor_types type,
+                               u32 attr, int channel, const char **str)
+{
+       struct peci_dimmtemp *priv = dev_get_drvdata(dev);
+
+       if (attr != hwmon_temp_label)
+               return -EOPNOTSUPP;
+
+       *str = (const char *)priv->dimmtemp_label[channel];
+
+       return 0;
+}
+
+static int dimmtemp_read(struct device *dev, enum hwmon_sensor_types type,
+                        u32 attr, int channel, long *val)
+{
+       struct peci_dimmtemp *priv = dev_get_drvdata(dev);
+
+       switch (attr) {
+       case hwmon_temp_input:
+               return get_dimm_temp(priv, channel, val);
+       case hwmon_temp_max:
+               return get_dimm_thresholds(priv, temp_max_type, channel,
val);
+       case hwmon_temp_crit:
+               return get_dimm_thresholds(priv, temp_crit_type, channel,
val);
+       default:
+               break;
+       }
+
+       return -EOPNOTSUPP;
+}
+
+static umode_t dimmtemp_is_visible(const void *data, enum
hwmon_sensor_types type,
+                                  u32 attr, int channel)
+{
+       const struct peci_dimmtemp *priv = data;
+
+       if (test_bit(channel, priv->dimm_mask))
+               return 0444;
+
+       return 0;
+}
+
+static const struct hwmon_ops peci_dimmtemp_ops = {
+       .is_visible = dimmtemp_is_visible,
+       .read_string = dimmtemp_read_string,
+       .read = dimmtemp_read,
+};
+
+static int check_populated_dimms(struct peci_dimmtemp *priv)
+{
+       int chan_rank_max = priv->gen_info->chan_rank_max;
+       int dimm_idx_max = priv->gen_info->dimm_idx_max;
+       u32 chan_rank_empty = 0;
+       u64 dimm_mask = 0;
+       int chan_rank, dimm_idx, ret;
+       u32 pcs;
+
+       BUILD_BUG_ON(CHAN_RANK_MAX > 32);
+       BUILD_BUG_ON(DIMM_NUMS_MAX > 64);

I don't immediately see the value of those build bugs. What happens if
CHAN_RANK_MAX > 32 or DIMM_NUMS_MAX > 64 ? Where do those limits come
from ?

Supported HW doesn't come near the limit for now - it's just an "artificial"
limit imposed by variables we're using (u64 for dimm_mask and u32 for
chan_rank_empty).


Please use a value derived from the size of those variables for the check
to clarify and explain the constraints.

Thanks,
Guenter