Re: [PATCH v2 2/3] thermal: Add support of multi sensors to thermal_core

From: Pin-yen Lin
Date: Thu Apr 11 2024 - 16:47:00 EST


Hi Alexandre,

On Thu, Apr 11, 2024 at 4:34 PM Alexandre Bailon <abailon@xxxxxxxxxxxx> wrote:
>
> This adds support of multi sensors to thermal.
> Currently, this only support the get_temp operation.
> This returns an average temperature of all the sensors.
> If defined, a coefficient is applied to the value read from the sensor
> before computing the average.
>
> Signed-off-by: Alexandre Bailon <abailon@xxxxxxxxxxxx>
> ---
> drivers/thermal/Makefile | 1 +
> drivers/thermal/thermal_core.h | 7 ++
> drivers/thermal/thermal_multi.c | 178 ++++++++++++++++++++++++++++++++
> 3 files changed, 186 insertions(+)
> create mode 100644 drivers/thermal/thermal_multi.c
>
> diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile
> index c934cab309ae..757289a406f7 100644
> --- a/drivers/thermal/Makefile
> +++ b/drivers/thermal/Makefile
> @@ -6,6 +6,7 @@ CFLAGS_thermal_core.o := -I$(src)
> obj-$(CONFIG_THERMAL) += thermal_sys.o
> thermal_sys-y += thermal_core.o thermal_sysfs.o
> thermal_sys-y += thermal_trip.o thermal_helpers.o
> +thermal_sys-y += thermal_multi.o
>
> # netlink interface to manage the thermal framework
> thermal_sys-$(CONFIG_THERMAL_NETLINK) += thermal_netlink.o
> diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h
> index 0a3b3ec5120b..26e83a5c8298 100644
> --- a/drivers/thermal/thermal_core.h
> +++ b/drivers/thermal/thermal_core.h
> @@ -138,6 +138,13 @@ ssize_t weight_show(struct device *, struct device_attribute *, char *);
> ssize_t weight_store(struct device *, struct device_attribute *, const char *,
> size_t);
>
> +/* Multi sensors */
> +int thermal_multi_sensor_validate_coeff(int *coeff, int count, int offset);
> +int thermal_multi_sensor_register(const char *name,
> + struct thermal_zone_device *sensor_tz, int coeff);
> +void thermal_multi_sensor_unregister(struct thermal_zone_device *sensor_tz);
> +
> +
> #ifdef CONFIG_THERMAL_STATISTICS
> void thermal_cooling_device_stats_update(struct thermal_cooling_device *cdev,
> unsigned long new_state);
> diff --git a/drivers/thermal/thermal_multi.c b/drivers/thermal/thermal_multi.c
> new file mode 100644
> index 000000000000..a5a4f1f2d594
> --- /dev/null
> +++ b/drivers/thermal/thermal_multi.c
> @@ -0,0 +1,178 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +#include <linux/err.h>
> +#include <linux/export.h>
> +#include <linux/of.h>
> +#include <linux/slab.h>
> +#include <linux/thermal.h>
> +#include <linux/types.h>
> +#include <linux/string.h>
> +
> +#include "thermal_core.h"
> +
> +struct sensor_interface {
> + struct thermal_zone_device *tz;
> + int coeff;
> +
> + struct list_head node;
> +};
> +
> +struct multi_sensor_thermal_zone {
> + struct thermal_zone_device *tz;
> + struct mutex sensors_lock;
> + struct list_head sensors;
> +
> + struct list_head node;
> +};
> +
> +static DEFINE_MUTEX(multi_tz_mutex);
> +static LIST_HEAD(multi_tz_list);
> +
> +#define TJ_MAX 120000
> +
> +static int multi_sensor_get_temp(struct thermal_zone_device *tz, int *temp)
> +{
> + struct multi_sensor_thermal_zone *multi_tz = tz->devdata;
> + struct sensor_interface *sensor;
> + int accumulated_temp = 0;
> + u32 accumulated_coeff;

Should we initialize accumulated_coeff to 0 as well?

> + int ret;
> +
> + mutex_lock(&multi_tz->sensors_lock);
> +
> + if (list_empty(&multi_tz->sensors)) {
> + mutex_unlock(&multi_tz->sensors_lock);
> + return -ENODEV;
> + }
> +
> + list_for_each_entry(sensor, &multi_tz->sensors, node) {
> + ret = thermal_zone_get_temp(sensor->tz, temp);
> + if (ret) {
> + mutex_unlock(&multi_tz->sensors_lock);
> + return ret;
> + }
> +
> + accumulated_temp += *temp * sensor->coeff;
> + accumulated_coeff += sensor->coeff;
> + }
> +
> + mutex_unlock(&multi_tz->sensors_lock);
> +
> + *temp = accumulated_temp / accumulated_coeff;
> + return ret;
> +}
> +
> +struct thermal_zone_device_ops multi_sensor_ops = {
> + .get_temp = multi_sensor_get_temp,
> +};
> +
> +int thermal_multi_sensor_validate_coeff(int *coeff, int count, int offset)
> +{
> + int max_accumulated_temp = 0;
> + int i;
> +
> + for (i = 0; i < count; i++) {
> + max_accumulated_temp += TJ_MAX * coeff[i];
> + if (max_accumulated_temp < 0)
> + return -EOVERFLOW;
> + }
> +
> + max_accumulated_temp += offset;
> + return max_accumulated_temp < 0 ? -EOVERFLOW : 0;
> +}
> +
> +static struct thermal_zone_device *multi_sensor_tz_alloc(const char *name)
> +{
> + struct thermal_zone_device *tz;
> + struct thermal_zone_params tzp = {};
> + struct multi_sensor_thermal_zone *multi_tz;
> +
> + tz = thermal_zone_get_zone_by_name(name);
> + if (!IS_ERR(tz)) {
> + mutex_unlock(&multi_tz_mutex);
> + return tz;
> + }
> +
> + multi_tz = kzalloc(sizeof(*multi_tz), GFP_KERNEL);
> + if (!multi_tz)
> + return ERR_PTR(-ENOMEM);
> + mutex_init(&multi_tz->sensors_lock);
> + INIT_LIST_HEAD(&multi_tz->sensors);
> +
> + tzp.no_hwmon = true;
> + tzp.slope = 1;
> + tzp.offset = 0;
> +
> + tz = thermal_tripless_zone_device_register(name, multi_tz,
> + &multi_sensor_ops, &tzp);
> + if (IS_ERR(tz)) {
> + kfree(multi_tz);
> + } else {
> + multi_tz->tz = tz;
> + list_add(&multi_tz->node, &multi_tz_list);
> + }
> +
> + return tz;
> +}
> +
> +int thermal_multi_sensor_register(const char *name,
> + struct thermal_zone_device *sensor_tz, int coeff)
> +{
> + struct thermal_zone_device *tz;
> + struct multi_sensor_thermal_zone *multi_tz;
> + struct sensor_interface *sensor;
> +
> + mutex_lock(&multi_tz_mutex);
> +
> + tz = multi_sensor_tz_alloc(name);
> + if (IS_ERR(tz)) {
> + mutex_unlock(&multi_tz_mutex);
> + return PTR_ERR(tz);
> + }
> + multi_tz = tz->devdata;
> +
> + sensor = kzalloc(sizeof(*sensor), GFP_KERNEL);
> + if (!sensor) {
> + mutex_unlock(&multi_tz_mutex);
> + return -ENOMEM;
> + }
> +
> + sensor->tz = sensor_tz;
> + sensor->coeff = coeff;
> + mutex_lock(&multi_tz->sensors_lock);
> + list_add(&sensor->node, &multi_tz->sensors);
> + mutex_unlock(&multi_tz->sensors_lock);
> +
> + thermal_zone_device_enable(tz);
> +
> + mutex_unlock(&multi_tz_mutex);
> +
> + return 0;
> +}
> +
> +void thermal_multi_sensor_unregister(struct thermal_zone_device *sensor_tz)
> +{
> + struct multi_sensor_thermal_zone *multi_tz;
> + struct sensor_interface *sensor, *tmp;
> +
> + mutex_lock(&multi_tz_mutex);
> + list_for_each_entry(multi_tz, &multi_tz_list, node) {
> + mutex_lock(&multi_tz->sensors_lock);
> + list_for_each_entry_safe(sensor, tmp, &multi_tz->sensors, node) {
> + if (sensor->tz == sensor_tz) {
> + list_del(&sensor->node);
> + kfree(sensor);
> + break;
> + }
> + }
> +
> + if (list_empty(&multi_tz->sensors)) {
> + thermal_zone_device_unregister(multi_tz->tz);
> + mutex_unlock(&multi_tz->sensors_lock);
> + kfree(multi_tz);
> + } else {
> + mutex_unlock(&multi_tz->sensors_lock);
> + }
> + }
> + mutex_unlock(&multi_tz_mutex);
> +}
> --
> 2.41.0
>

By the way, may I know why min/max aggregation is dropped in this
version? I thought that checking max temperature is the most direct
approach to protect the hardware and the users from high temperature.

Best regards,
Pin-yen