[PATCH 1/2] habanalabs: add support for getting device total energy

From: Oded Gabbay
Date: Sat Aug 22 2020 - 12:23:44 EST


From: farah kassabri <fkassabri@xxxxxxxxx>

Add driver implementation for reading the total energy consumption
from the device ARM FW.

Signed-off-by: farah kassabri <fkassabri@xxxxxxxxx>
Reviewed-by: Oded Gabbay <oded.gabbay@xxxxxxxxx>
Signed-off-by: Oded Gabbay <oded.gabbay@xxxxxxxxx>
---
drivers/misc/habanalabs/common/firmware_if.c | 24 +++++++++++++++++++
drivers/misc/habanalabs/common/habanalabs.h | 2 ++
.../misc/habanalabs/common/habanalabs_ioctl.c | 24 +++++++++++++++++++
.../misc/habanalabs/include/common/armcp_if.h | 1 +
include/uapi/misc/habanalabs.h | 10 ++++++++
5 files changed, 61 insertions(+)

diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index 0842c2211475..d15db03c79a4 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -402,6 +402,30 @@ int hl_fw_armcp_pci_counters_get(struct hl_device *hdev,
return rc;
}

+int hl_fw_armcp_total_energy_get(struct hl_device *hdev,
+ u64 *total_energy)
+{
+ struct armcp_packet pkt = {};
+ long result;
+ int rc;
+
+ pkt.ctl = cpu_to_le32(ARMCP_PACKET_TOTAL_ENERGY_GET <<
+ ARMCP_PKT_CTL_OPCODE_SHIFT);
+
+ rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+ HL_ARMCP_INFO_TIMEOUT_USEC, &result);
+ if (rc) {
+ dev_err(hdev->dev,
+ "Failed to handle ArmCP total energy pkt, error %d\n",
+ rc);
+ return rc;
+ }
+
+ *total_energy = result;
+
+ return rc;
+}
+
static void fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg)
{
u32 err_val;
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 0a422fd742e9..f912e344ed33 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -1849,6 +1849,8 @@ int hl_fw_armcp_info_get(struct hl_device *hdev);
int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size);
int hl_fw_armcp_pci_counters_get(struct hl_device *hdev,
struct hl_info_pci_counters *counters);
+int hl_fw_armcp_total_energy_get(struct hl_device *hdev,
+ u64 *total_energy);
int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
u32 msg_to_cpu_reg, u32 cpu_msg_status_reg,
u32 boot_err0_reg, bool skip_bmc,
diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
index a94800014243..18ee14b4b0e1 100644
--- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
@@ -357,6 +357,27 @@ static int sync_manager_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
sizeof(sm_info))) ? -EFAULT : 0;
}

+static int total_energy_consumption_info(struct hl_fpriv *hpriv,
+ struct hl_info_args *args)
+{
+ struct hl_device *hdev = hpriv->hdev;
+ struct hl_info_energy total_energy = {0};
+ u32 max_size = args->return_size;
+ void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+ int rc;
+
+ if ((!max_size) || (!out))
+ return -EINVAL;
+
+ rc = hl_fw_armcp_total_energy_get(hdev,
+ &total_energy.total_energy_consumption);
+ if (rc)
+ return rc;
+
+ return copy_to_user(out, &total_energy,
+ min((size_t) max_size, sizeof(total_energy))) ? -EFAULT : 0;
+}
+
static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
struct device *dev)
{
@@ -429,6 +450,9 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
case HL_INFO_SYNC_MANAGER:
return sync_manager_info(hpriv, args);

+ case HL_INFO_TOTAL_ENERGY:
+ return total_energy_consumption_info(hpriv, args);
+
default:
dev_err(dev, "Invalid request %d\n", args->op);
rc = -ENOTTY;
diff --git a/drivers/misc/habanalabs/include/common/armcp_if.h b/drivers/misc/habanalabs/include/common/armcp_if.h
index 1403c937253c..4d78898524e9 100644
--- a/drivers/misc/habanalabs/include/common/armcp_if.h
+++ b/drivers/misc/habanalabs/include/common/armcp_if.h
@@ -245,6 +245,7 @@ enum armcp_packet_id {
ARMCP_PACKET_CURRENT_SET, /* sysfs */
ARMCP_PACKET_PCIE_THROUGHPUT_GET, /* internal */
ARMCP_PACKET_PCIE_REPLAY_CNT_GET, /* internal */
+ ARMCP_PACKET_TOTAL_ENERGY_GET, /* internal */
};

#define ARMCP_PACKET_FENCE_VAL 0xFE8CE7A5
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index 693081728ef3..6803991726e8 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -267,6 +267,7 @@ enum hl_device_status {
* HL_INFO_PCI_COUNTERS - Retrieve PCI counters
* HL_INFO_CLK_THROTTLE_REASON - Retrieve clock throttling reason
* HL_INFO_SYNC_MANAGER - Retrieve sync manager info per dcore
+ * HL_INFO_TOTAL_ENERGY - Retrieve total energy consumption
*/
#define HL_INFO_HW_IP_INFO 0
#define HL_INFO_HW_EVENTS 1
@@ -282,6 +283,7 @@ enum hl_device_status {
#define HL_INFO_PCI_COUNTERS 12
#define HL_INFO_CLK_THROTTLE_REASON 13
#define HL_INFO_SYNC_MANAGER 14
+#define HL_INFO_TOTAL_ENERGY 15

#define HL_INFO_VERSION_MAX_LEN 128
#define HL_INFO_CARD_NAME_MAX_LEN 16
@@ -375,6 +377,14 @@ struct hl_info_clk_throttle {
__u32 clk_throttling_reason;
};

+/**
+ * struct hl_info_energy - device energy information
+ * @total_energy_consumption: total device energy consumption
+ */
+struct hl_info_energy {
+ __u64 total_energy_consumption;
+};
+
/**
* struct hl_info_sync_manager - sync manager information
* @first_available_sync_object: first available sob
--
2.17.1