[PATCH V1 8/9] accel: amd-ai-engine: Create tile memory information
From: Gregory Williams
Date: Wed Jul 02 2025 - 11:59:42 EST
Creates tile memory information structure to store size and offsets for
core data and program memory and memory tile memory for AIEML.
Signed-off-by: Gregory Williams <gregory.williams@xxxxxxx>
---
drivers/accel/amd-ai-engine/ai-engine-aie.c | 39 +++++++++
drivers/accel/amd-ai-engine/ai-engine-aieml.c | 47 ++++++++++
.../accel/amd-ai-engine/ai-engine-internal.h | 85 +++++++++++++------
drivers/accel/amd-ai-engine/ai-engine-part.c | 45 ++++++++++
4 files changed, 192 insertions(+), 24 deletions(-)
diff --git a/drivers/accel/amd-ai-engine/ai-engine-aie.c b/drivers/accel/amd-ai-engine/ai-engine-aie.c
index 5e3cb44a16c8..056db0b7be0e 100644
--- a/drivers/accel/amd-ai-engine/ai-engine-aie.c
+++ b/drivers/accel/amd-ai-engine/ai-engine-aie.c
@@ -16,6 +16,8 @@
#define AIE_COL_SHIFT 23U
#define AIE_ROW_SHIFT 18U
+#define NUM_TYPES_OF_MEM 2U
+
/*
* Register offsets
*/
@@ -41,6 +43,42 @@ static u32 aie_get_tile_type(struct aie_device *adev, struct aie_location *loc)
return AIE_TILE_TYPE_SHIMNOC;
}
+static unsigned int aie_get_mem_info(struct aie_device *adev,
+ struct aie_range *range,
+ struct aie_part_mem *pmem)
+{
+ u8 start_row, num_rows;
+ unsigned int i;
+
+ if (range->start.row + range->size.row <= 1) {
+ /* SHIM row only, no memories in this range */
+ return 0;
+ }
+ if (!pmem)
+ return NUM_TYPES_OF_MEM;
+
+ for (i = 0; i < NUM_TYPES_OF_MEM; i++) {
+ struct aie_mem *mem = &pmem[i].mem;
+
+ memcpy(&mem->range, range, sizeof(*range));
+ }
+
+ start_row = adev->ttype_attr[AIE_TILE_TYPE_TILE].start_row;
+ num_rows = adev->ttype_attr[AIE_TILE_TYPE_TILE].num_rows;
+ /* Setup tile data memory information */
+ pmem[0].mem.offset = 0;
+ pmem[0].mem.size = KBYTES(32);
+ pmem[0].mem.range.start.row = start_row;
+ pmem[0].mem.range.size.row = num_rows;
+ /* Setup program memory information */
+ pmem[1].mem.offset = 0x20000;
+ pmem[1].mem.size = KBYTES(16);
+ pmem[1].mem.range.start.row = start_row;
+ pmem[1].mem.range.size.row = num_rows;
+
+ return NUM_TYPES_OF_MEM;
+}
+
/* aie_scan_part_clocks() - scan clocks of a partition
* @apart: AI engine partition
*
@@ -258,6 +296,7 @@ static int aie_set_part_clocks(struct aie_partition *apart)
}
static const struct aie_tile_operations aie_ops = {
.get_tile_type = aie_get_tile_type,
+ .get_mem_info = aie_get_mem_info,
.scan_part_clocks = aie_scan_part_clocks,
.set_part_clocks = aie_set_part_clocks,
};
diff --git a/drivers/accel/amd-ai-engine/ai-engine-aieml.c b/drivers/accel/amd-ai-engine/ai-engine-aieml.c
index 328688942a6a..7730609ff7c0 100644
--- a/drivers/accel/amd-ai-engine/ai-engine-aieml.c
+++ b/drivers/accel/amd-ai-engine/ai-engine-aieml.c
@@ -50,6 +50,52 @@ static u32 aieml_get_tile_type(struct aie_device *adev,
return AIE_TILE_TYPE_SHIMNOC;
}
+static unsigned int aieml_get_mem_info(struct aie_device *adev,
+ struct aie_range *range,
+ struct aie_part_mem *pmem)
+{
+ u8 start_row, num_rows;
+ unsigned int i;
+
+ if (range->start.row + range->size.row <= 1) {
+ /* SHIM row only, no memories in this range */
+ return 0;
+ }
+
+ if (!pmem)
+ return NUM_TYPES_OF_MEM;
+
+ for (i = 0; i < NUM_TYPES_OF_MEM; i++) {
+ struct aie_mem *mem = &pmem[i].mem;
+
+ memcpy(&mem->range, range, sizeof(*range));
+ }
+
+ start_row = adev->ttype_attr[AIE_TILE_TYPE_TILE].start_row;
+ num_rows = adev->ttype_attr[AIE_TILE_TYPE_TILE].num_rows;
+ /* Setup tile data memory information */
+ pmem[0].mem.offset = 0;
+ pmem[0].mem.size = KBYTES(64);
+ pmem[0].mem.range.start.row = start_row;
+ pmem[0].mem.range.size.row = num_rows;
+
+ /* Setup program memory information */
+ pmem[1].mem.offset = 0x20000;
+ pmem[1].mem.size = KBYTES(16);
+ pmem[1].mem.range.start.row = start_row;
+ pmem[1].mem.range.size.row = num_rows;
+
+ start_row = adev->ttype_attr[AIE_TILE_TYPE_MEMORY].start_row;
+ num_rows = adev->ttype_attr[AIE_TILE_TYPE_MEMORY].num_rows;
+ /* Setup memory tile memory information */
+ pmem[2].mem.offset = 0;
+ pmem[2].mem.size = KBYTES(512);
+ pmem[2].mem.range.start.row = start_row;
+ pmem[2].mem.range.size.row = num_rows;
+
+ return NUM_TYPES_OF_MEM;
+}
+
/* aieml_scan_part_clocks() - scan clocks of a partition
* @apart: AI engine partition
*
@@ -188,6 +234,7 @@ static int aieml_set_part_clocks(struct aie_partition *apart)
static const struct aie_tile_operations aieml_ops = {
.get_tile_type = aieml_get_tile_type,
+ .get_mem_info = aieml_get_mem_info,
.scan_part_clocks = aieml_scan_part_clocks,
.set_part_clocks = aieml_set_part_clocks,
};
diff --git a/drivers/accel/amd-ai-engine/ai-engine-internal.h b/drivers/accel/amd-ai-engine/ai-engine-internal.h
index 31a45575cc43..13a39c4e3331 100644
--- a/drivers/accel/amd-ai-engine/ai-engine-internal.h
+++ b/drivers/accel/amd-ai-engine/ai-engine-internal.h
@@ -68,30 +68,6 @@ struct aie_device;
struct aie_partition;
struct aie_aperture;
-/**
- * struct aie_tile_operations - AI engine device operations
- * @get_tile_type: get type of tile based on tile operation
- * @scan_part_clocks: scan partition modules to check whether the modules are
- * clock gated or not, and update the soft clock states
- * structure. It is required to be called when the partition
- * is requested so that the driver knows which modules are
- * clock gated when the partition is requested. This function
- * expects the caller to apply partition lock before calling
- * this function.
- * @set_part_clocks: set partition modules clocks gate registers based on the
- * partition clock states bitmap. This function expects the
- * caller to apply partition lock before calling this
- * function. The caller function will need to set the bitmap
- * on which tiles are required to be clocked on.
- * Different AI engine device version has its own device
- * operation.
- */
-struct aie_tile_operations {
- u32 (*get_tile_type)(struct aie_device *adev, struct aie_location *loc);
- int (*scan_part_clocks)(struct aie_partition *apart);
- int (*set_part_clocks)(struct aie_partition *apart);
-};
-
/**
* struct aie_resource - AI engine resource structure
* @bitmap: resource bitmap
@@ -112,6 +88,37 @@ struct aie_range {
struct aie_location size;
};
+/**
+ * struct aie_mem - AIE memory information
+ * @range: range of tiles of the memory
+ * @offset: register offset within a tile of the memory
+ * @size: of a the memory in one tile
+ */
+struct aie_mem {
+ struct aie_range range;
+ __kernel_size_t offset;
+ __kernel_size_t size;
+};
+
+/**
+ * struct aie_part_mem - AI engine partition memory information structure
+ * @apart: AI engine partition
+ * @mem: memory information of a type of memory
+ * @size: size of the total memories in the partition
+ *
+ * This structure is to keep the information of a type of memory in a
+ * partition. The memory information will be stored in @mem property.
+ * The following information will be kept:
+ * * memory start address offset within a tile
+ * * memory size
+ * * what tiles contain this type of memory
+ */
+struct aie_part_mem {
+ struct aie_partition *apart;
+ struct aie_mem mem;
+ size_t size;
+};
+
/**
* struct aie_tile_attr - AI engine device tile type attributes
* @start_row: start row
@@ -126,6 +133,34 @@ struct aie_tile_attr {
const enum aie_module_type *mods;
};
+/**
+ * struct aie_tile_operations - AI engine device operations
+ * @get_tile_type: get type of tile based on tile operation
+ * @get_mem_info: get different types of memories information
+ * @scan_part_clocks: scan partition modules to check whether the modules are
+ * clock gated or not, and update the soft clock states
+ * structure. It is required to be called when the partition
+ * is requested so that the driver knows which modules are
+ * clock gated when the partition is requested. This function
+ * expects the caller to apply partition lock before calling
+ * this function.
+ * @set_part_clocks: set partition modules clocks gate registers based on the
+ * partition clock states bitmap. This function expects the
+ * caller to apply partition lock before calling this
+ * function. The caller function will need to set the bitmap
+ * on which tiles are required to be clocked on.
+ * Different AI engine device version has its own device
+ * operation.
+ */
+struct aie_tile_operations {
+ u32 (*get_tile_type)(struct aie_device *adev, struct aie_location *loc);
+ unsigned int (*get_mem_info)(struct aie_device *adev,
+ struct aie_range *range,
+ struct aie_part_mem *pmem);
+ int (*scan_part_clocks)(struct aie_partition *apart);
+ int (*set_part_clocks)(struct aie_partition *apart);
+};
+
/**
* struct aie_device - AI engine device structure
* @apertures: list of apertures
@@ -188,6 +223,7 @@ struct aie_aperture {
* @range: range of partition
* @cores_clk_state: bitmap to indicate the power state of core and mem tiles
* @tiles_inuse: bitmap to indicate if a tile is in use
+ * @pmems: pointer to partition memories types
* @mlock: protection for AI engine partition operations
* @freq_req: required frequency
*/
@@ -198,6 +234,7 @@ struct aie_partition {
struct aie_range range;
struct aie_resource cores_clk_state;
struct aie_resource tiles_inuse;
+ struct aie_part_mem *pmems;
struct mutex mlock; /* protection for AI engine partition operations */
u64 freq_req;
};
diff --git a/drivers/accel/amd-ai-engine/ai-engine-part.c b/drivers/accel/amd-ai-engine/ai-engine-part.c
index 83099cb60161..878597eff202 100644
--- a/drivers/accel/amd-ai-engine/ai-engine-part.c
+++ b/drivers/accel/amd-ai-engine/ai-engine-part.c
@@ -12,6 +12,44 @@
#include "ai-engine-internal.h"
+/**
+ * aie_part_create_mems_info() - creates array to store the AI engine partition
+ * different memories types information
+ * @apart: AI engine partition
+ *
+ * Return: 0 for success, negative value for failure
+ *
+ * This function will create array to store the information of different
+ * memories types in the partition. This array is stored in @apart->pmems.
+ */
+static int aie_part_create_mems_info(struct aie_partition *apart)
+{
+ unsigned int i, num_mems;
+
+ num_mems = apart->adev->ops->get_mem_info(apart->adev, &apart->range,
+ NULL);
+ if (!num_mems)
+ return 0;
+
+ apart->pmems = devm_kcalloc(apart->aperture->dev, num_mems,
+ sizeof(struct aie_part_mem),
+ GFP_KERNEL);
+ if (!apart->pmems)
+ return -ENOMEM;
+
+ apart->adev->ops->get_mem_info(apart->adev, &apart->range,
+ apart->pmems);
+ for (i = 0; i < num_mems; i++) {
+ struct aie_mem *mem = &apart->pmems[i].mem;
+
+ apart->pmems[i].apart = apart;
+ apart->pmems[i].size = mem->size *
+ mem->range.size.col *
+ mem->range.size.row;
+ }
+ return 0;
+}
+
/**
* aie_part_release() - release an AI engine partition instance
* @apart: AI engine partition device
@@ -29,6 +67,7 @@ void aie_part_release(struct aie_partition *apart)
aie_resource_uninitialize(&apart->cores_clk_state);
aie_resource_uninitialize(&apart->tiles_inuse);
list_del(&apart->node);
+ devm_kfree(aperture->dev, apart->pmems);
devm_kfree(aperture->dev, apart);
mutex_unlock(&aperture->mlock);
}
@@ -64,6 +103,12 @@ struct aie_partition *aie_part_create(struct aie_aperture *aperture,
apart->range.start.row = aperture->range.start.row;
apart->range.size.row = aperture->range.size.row;
+ ret = aie_part_create_mems_info(apart);
+ if (ret) {
+ dev_err(aperture->dev, "failed to create tile memory information.");
+ return ERR_PTR(ret);
+ }
+
/* SHIM row always enabled so it is not needed in the bitmap */
num_tiles = apart->range.size.col * (apart->range.size.row - 1);
ret = aie_resource_initialize(&apart->cores_clk_state, num_tiles);
--
2.34.1