Re: [PATCH v2 4/4] thermal: mediatek: add another get_temp ops for thermal sensors

From: Amjad Ouled-Ameur
Date: Fri Aug 26 2022 - 10:12:33 EST


Hi Christophe,

On 7/20/22 20:54, Christophe JAILLET wrote:
Le 20/07/2022 à 20:18, Amjad Ouled-Ameur a écrit :
Provide thermal zone to read thermal sensor in the SoC. We can read all the
thermal sensors value in the SoC by the node /sys/class/thermal/

In mtk_thermal_bank_temperature, return -EAGAIN instead of -EACCESS
on the first read of sensor that often are bogus values.
This can avoid following warning on boot:

   thermal thermal_zone6: failed to read out thermal zone (-13)

Signed-off-by: default avatarMichael Kao <michael.kao-NuS5LvNUpcJWk0Htik3J/w@xxxxxxxxxxxxxxxx>
Signed-off-by: default avatarHsin-Yi Wang <hsinyi-F7+t8E8rja9g9hUCZPvPmw@xxxxxxxxxxxxxxxx>
Signed-off-by: Amjad Ouled-Ameur <aouledameur-rdvid1DuHRBWk0Htik3J/w@xxxxxxxxxxxxxxxx>
Tested-by: Amjad Ouled-Ameur <aouledameur-rdvid1DuHRBWk0Htik3J/w@xxxxxxxxxxxxxxxx>
---
  drivers/thermal/mtk_thermal.c | 100 ++++++++++++++++++++++++++--------
  1 file changed, 76 insertions(+), 24 deletions(-)

diff --git a/drivers/thermal/mtk_thermal.c b/drivers/thermal/mtk_thermal.c
index 1dc276f8c4f1..79b14ce1a08d 100644
--- a/drivers/thermal/mtk_thermal.c
+++ b/drivers/thermal/mtk_thermal.c
@@ -259,6 +259,11 @@ enum mtk_thermal_version {
    struct mtk_thermal;
  +struct mtk_thermal_zone {
+    struct mtk_thermal *mt;
+    int id;
+};
+
  struct thermal_bank_cfg {
      unsigned int num_sensors;
      const int *sensors;
@@ -709,6 +714,32 @@ static void mtk_thermal_put_bank(struct mtk_thermal_bank *bank)
          mutex_unlock(&mt->lock);
  }
  +static u32 _get_sensor_temp(struct mtk_thermal *mt, int id)
+{
+    u32 raw;
+    int temp;
+
+    const struct mtk_thermal_data *conf = mt->conf;
+
+    raw = readl(mt->thermal_base + conf->msr[id]);
+
+    if (mt->conf->version == MTK_THERMAL_V1)
+        temp = raw_to_mcelsius_v1(mt, id, raw);
+    else
+        temp = raw_to_mcelsius_v2(mt, id, raw);
+
+    /*
+     * The first read of a sensor often contains very high bogus
+     * temperature value. Filter these out so that the system does
+     * not immediately shut down.
+     */
+
+    if (temp > 200000)
+        return  -EAGAIN;

This function returns a u32. Is it ok to return -EAGAIN?

_get_sensor_temp() should normally return int instead u32, will fix it in V3.
There is also 2 spaces here...

+    else
+        return    temp;

... and a tab here.

will fix them in V3.
+}
+
  /**
   * mtk_thermal_bank_temperature - get the temperature of a bank
   * @bank:    The bank
@@ -721,26 +752,9 @@ static int mtk_thermal_bank_temperature(struct mtk_thermal_bank *bank)
      struct mtk_thermal *mt = bank->mt;
      const struct mtk_thermal_data *conf = mt->conf;
      int i, temp = INT_MIN, max = INT_MIN;
-    u32 raw;
        for (i = 0; i < conf->bank_data[bank->id].num_sensors; i++) {
-        raw = readl(mt->thermal_base + conf->msr[i]);
-
-        if (mt->conf->version == MTK_THERMAL_V1) {
-            temp = raw_to_mcelsius_v1(
-                mt, conf->bank_data[bank->id].sensors[i], raw);
-        } else {
-            temp = raw_to_mcelsius_v2(
-                mt, conf->bank_data[bank->id].sensors[i], raw);
-        }
-
-        /*
-         * The first read of a sensor often contains very high bogus
-         * temperature value. Filter these out so that the system does
-         * not immediately shut down.
-         */
-        if (temp > 200000)
-            temp = 0;
+        temp = _get_sensor_temp(mt, i);

Is it ok if _get_sensor_temp() returns -EAGAIN?

drivers/thermal/thermal_core.c:update_temperature() checks for -EAGAIN after

thermal_zone_get_temp() is called, thus, I think it's good to return -EAGAIN in

case of failure.

            if (temp > max)
              max = temp;
@@ -751,7 +765,8 @@ static int mtk_thermal_bank_temperature(struct mtk_thermal_bank *bank)
    static int mtk_read_temp(void *data, int *temperature)
  {
-    struct mtk_thermal *mt = data;
+    struct mtk_thermal_zone *tz = data;
+    struct mtk_thermal *mt = tz->mt;
      int i;
      int tempmax = INT_MIN;
  @@ -770,10 +785,28 @@ static int mtk_read_temp(void *data, int *temperature)
      return 0;
  }
  +static int mtk_read_sensor_temp(void *data, int *temperature)
+{
+    struct mtk_thermal_zone *tz = data;
+    struct mtk_thermal *mt = tz->mt;
+    int id = tz->id - 1;
+
+    if (id < 0)
+        return  -EACCES;

2 spaces.


will fix it in V3.
+
+    *temperature = _get_sensor_temp(mt, id);

If _get_sensor_temp() returns -EAGAIN, should this be propagated to the caller?

+
+    return 0;
+}
+
  static const struct thermal_zone_of_device_ops mtk_thermal_ops = {
      .get_temp = mtk_read_temp,
  };
  +static const struct thermal_zone_of_device_ops mtk_thermal_sensor_ops = {
+    .get_temp = mtk_read_sensor_temp,
+};
+
  static void mtk_thermal_init_bank(struct mtk_thermal *mt, int num,
                    u32 apmixed_phys_base, u32 auxadc_phys_base,
                    int ctrl_id)
@@ -1072,6 +1105,7 @@ static int mtk_thermal_probe(struct platform_device *pdev)
      u64 auxadc_phys_base, apmixed_phys_base;
      struct thermal_zone_device *tzdev;
      void __iomem *apmixed_base, *auxadc_base;
+    struct mtk_thermal_zone *tz;
        mt = devm_kzalloc(&pdev->dev, sizeof(*mt), GFP_KERNEL);
      if (!mt)
@@ -1161,11 +1195,29 @@ static int mtk_thermal_probe(struct platform_device *pdev)
        platform_set_drvdata(pdev, mt);
  -    tzdev = devm_thermal_zone_of_sensor_register(&pdev->dev, 0, mt,
-                             &mtk_thermal_ops);
-    if (IS_ERR(tzdev)) {
-        ret = PTR_ERR(tzdev);
-        goto err_disable_clk_peri_therm;
+    for (i = 0; i < mt->conf->num_sensors + 1; i++) {
+        tz = kmalloc(sizeof(*tz), GFP_KERNEL);

Should this memory allocation be a devm_kmalloc(), or is this memory freed at some point by the framework?

(I don't know the thermal_zone API and the patch has no kfree())
AFAIK, thermal API does not free private data, therefore devm_kmalloc() should be used.

CJ

+        if (!tz)
+            return -ENOMEM;
+
+        tz->mt = mt;
+        tz->id = i;
+
+        tzdev = devm_thermal_zone_of_sensor_register(&pdev->dev, i, tz, (i == 0) ?
+                                 &mtk_thermal_ops :
+                                 &mtk_thermal_sensor_ops);
+
+        if (IS_ERR(tzdev)) {
+            if (PTR_ERR(tzdev) == -ENODEV) {
+                dev_warn(&pdev->dev,
+                     "sensor %d not registered in thermal zone in dt\n", i);
+                continue;
+            }
+            if (PTR_ERR(tzdev) == -EACCES) {
+                ret = PTR_ERR(tzdev);
+                goto err_disable_clk_peri_therm;
+            }
+        }
      }
        ret = devm_thermal_add_hwmon_sysfs(tzdev);

Thank you Christophe for the review.

Regards,

Amjad