[RFC PATCH 1/2] mm/demotion: Expose memory type details via sysfs

From: Aneesh Kumar K.V
Date: Thu Aug 25 2022 - 05:24:22 EST


This patch adds /sys/devices/virtual/memtier/ where all memory tier related
details can be found. All allocated memory types will be listed there as
/sys/devices/virtual/memtier/memtypeN/

The nodes which are part of a specific memory type can be listed via
/sys/devices/system/memtier/memtypeN/nodes.

The adistance value of a specific memory type can be listed via
/sys/devices/system/memtier/memtypeN/adistance.

A directory listing looks like:
:/sys/devices/virtual/memtier# tree memtype1
memtype1
├── adistance
├── nodes
├── subsystem -> ../../../../bus/memtier
└── uevent

Since we will be using struct device to expose details via sysfs, drop struct
kref and use struct device for refcounting the memtype.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@xxxxxxxxxxxxx>
---
include/linux/memory-tiers.h | 3 +-
mm/memory-tiers.c | 97 +++++++++++++++++++++++++++++++++---
2 files changed, 91 insertions(+), 9 deletions(-)

diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h
index ecd865922707..487209a572b2 100644
--- a/include/linux/memory-tiers.h
+++ b/include/linux/memory-tiers.h
@@ -6,6 +6,7 @@
#include <linux/nodemask.h>
#include <linux/kref.h>
#include <linux/mmzone.h>
+#include <linux/device.h>
/*
* Each tier cover a abstrace distance chunk size of 128
*/
@@ -28,7 +29,7 @@ struct memory_dev_type {
int adistance;
/* Nodes of same abstract distance */
nodemask_t nodes;
- struct kref kref;
+ struct device dev;
};

#ifdef CONFIG_NUMA
diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c
index ba844fe9cc8c..9eef3bd8d134 100644
--- a/mm/memory-tiers.c
+++ b/mm/memory-tiers.c
@@ -4,6 +4,7 @@
#include <linux/sysfs.h>
#include <linux/kobject.h>
#include <linux/memory.h>
+#include <linux/idr.h>
#include <linux/memory-tiers.h>

#include "internal.h"
@@ -31,6 +32,15 @@ static DEFINE_MUTEX(memory_tier_lock);
static LIST_HEAD(memory_tiers);
static struct memory_dev_type *node_memory_types[MAX_NUMNODES];
static struct memory_dev_type *default_dram_type;
+
+#define MAX_MEMORY_TYPE_ID 20
+static DEFINE_IDR(memory_type_idr);
+#define to_memory_type(device) container_of(device, struct memory_dev_type, dev)
+static struct bus_type memory_tier_subsys = {
+ .name = "memtier",
+ .dev_name = "memtier",
+};
+
#ifdef CONFIG_MIGRATION
static int top_tier_adistance;
/*
@@ -388,7 +398,7 @@ static inline void __init_node_memory_type(int node, struct memory_dev_type *mem
{
if (!node_memory_types[node]) {
node_memory_types[node] = memtype;
- kref_get(&memtype->kref);
+ get_device(&memtype->dev);
}
}

@@ -460,33 +470,87 @@ static bool clear_node_memory_tier(int node)
return cleared;
}

-static void release_memtype(struct kref *kref)
+static ssize_t nodes_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
- struct memory_dev_type *memtype;
+ int ret;
+ struct memory_dev_type *memtype = to_memory_type(dev);

- memtype = container_of(kref, struct memory_dev_type, kref);
+ mutex_lock(&memory_tier_lock);
+ ret = sysfs_emit(buf, "%*pbl\n", nodemask_pr_args(&memtype->nodes));
+ mutex_unlock(&memory_tier_lock);
+ return ret;
+}
+static DEVICE_ATTR_RO(nodes);
+
+static ssize_t adistance_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ int ret;
+ struct memory_dev_type *memtype = to_memory_type(dev);
+
+ mutex_lock(&memory_tier_lock);
+ ret = sysfs_emit(buf, "%d\n", memtype->adistance);
+ mutex_unlock(&memory_tier_lock);
+ return ret;
+}
+static DEVICE_ATTR_RO(adistance);
+
+static struct attribute *memtype_dev_attrs[] = {
+ &dev_attr_nodes.attr,
+ &dev_attr_adistance.attr,
+ NULL
+};
+
+static const struct attribute_group memtype_dev_group = {
+ .attrs = memtype_dev_attrs,
+};
+
+static const struct attribute_group *memtype_dev_groups[] = {
+ &memtype_dev_group,
+ NULL
+};
+
+static void memtype_device_release(struct device *dev)
+{
+ struct memory_dev_type *memtype = to_memory_type(dev);
+
+ idr_remove(&memory_type_idr, memtype->dev.id);
kfree(memtype);
}

struct memory_dev_type *alloc_memory_type(int adistance)
{
+ int ret;
struct memory_dev_type *memtype;

- memtype = kmalloc(sizeof(*memtype), GFP_KERNEL);
+ memtype = kzalloc(sizeof(*memtype), GFP_KERNEL);
if (!memtype)
return ERR_PTR(-ENOMEM);

memtype->adistance = adistance;
INIT_LIST_HEAD(&memtype->tier_sibiling);
memtype->nodes = NODE_MASK_NONE;
- kref_init(&memtype->kref);
+ memtype->dev.id = idr_alloc(&memory_type_idr, NULL,
+ 1, MAX_MEMORY_TYPE_ID + 1, GFP_KERNEL);
+ memtype->dev.bus = &memory_tier_subsys;
+ memtype->dev.release = memtype_device_release;
+ memtype->dev.groups = memtype_dev_groups;
+ dev_set_name(&memtype->dev, "%s%d", "memtype", memtype->dev.id);
+
+ ret = device_register(&memtype->dev);
+ if (ret) {
+ put_device(&memtype->dev);
+ return ERR_PTR(ret);
+ }
+
return memtype;
}
EXPORT_SYMBOL_GPL(alloc_memory_type);

void destroy_memory_type(struct memory_dev_type *memtype)
{
- kref_put(&memtype->kref, release_memtype);
+ device_unregister(&memtype->dev);
}
EXPORT_SYMBOL_GPL(destroy_memory_type);

@@ -504,7 +568,7 @@ void clear_node_memory_type(int node, struct memory_dev_type *memtype)
mutex_lock(&memory_tier_lock);
if (node_memory_types[node] == memtype) {
node_memory_types[node] = NULL;
- kref_put(&memtype->kref, release_memtype);
+ put_device(&memtype->dev);
}
mutex_unlock(&memory_tier_lock);
}
@@ -582,6 +646,23 @@ static int __init memory_tier_init(void)
}
subsys_initcall(memory_tier_init);

+/*
+ * initialize sysfs subsys in core_initcall so that
+ * other kernel components can do alloc_memory_type
+ * before memory_tier_init
+ */
+static int __init memory_tier_sysfs_init(void)
+{
+ int err;
+
+ err = subsys_virtual_register(&memory_tier_subsys, NULL);
+ if (err)
+ return err;
+
+ return 0;
+}
+core_initcall(memory_tier_sysfs_init);
+
bool numa_demotion_enabled = false;

#ifdef CONFIG_MIGRATION
--
2.37.2