[PATCH 2/4] memcg: reduce memory for the lruvec and memcg stats

From: Shakeel Butt
Date: Tue Apr 23 2024 - 01:19:34 EST


At the moment, the amount of memory allocated for stats related structs
in the mem_cgroup corresponds to the size of enum node_stat_item.
However not all fields in enum node_stat_item has corresponding memcg
stats. The fields of enum node_stat_item is sorted in such a way that
all the fields with corresponding memcg stats are at the start of the
enum node_stat_item. So, let's just make an explicit boundary within
enum node_stat_item and use that boundary to allocate memory for stats
related structs of memcgs.

For a given x86_64 config, the size of stats with and without patch is:

structs size in bytes w/o with

struct lruvec_stats 1128 648
struct lruvec_stats_percpu 752 432
struct memcg_vmstats 1832 1352
struct memcg_vmstats_percpu 1280 960

The memory savings is further compounded by the fact that these structs
are allocated for each cpu and for node. To be precise, for each memcg,
the memory saved would be:

Memory saved = ((21 * 3 * NR_NODES) + (21 * 2 * NR_NODS * NR_CPUS) +
(21 * 3) + (21 * 2 * NR_CPUS)) * sizeof(long)

Where 21 is the number of fields eliminated.

Signed-off-by: Shakeel Butt <shakeel.butt@xxxxxxxxx>
---
include/linux/memcontrol.h | 12 ++++++------
include/linux/mmzone.h | 8 ++++++--
mm/memcontrol.c | 5 ++++-
3 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 9aba0d0462ca..d68db7a0e829 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -32,7 +32,7 @@ struct kmem_cache;

/* Cgroup-specific page state, on top of universal node page state */
enum memcg_stat_item {
- MEMCG_SWAP = NR_VM_NODE_STAT_ITEMS,
+ MEMCG_SWAP = NR_VM_NODE_MEMCG_STAT_ITEMS,
MEMCG_SOCK,
MEMCG_PERCPU_B,
MEMCG_VMALLOC,
@@ -92,21 +92,21 @@ struct mem_cgroup_reclaim_iter {

struct lruvec_stats_percpu {
/* Local (CPU and cgroup) state */
- long state[NR_VM_NODE_STAT_ITEMS];
+ long state[NR_VM_NODE_MEMCG_STAT_ITEMS];

/* Delta calculation for lockless upward propagation */
- long state_prev[NR_VM_NODE_STAT_ITEMS];
+ long state_prev[NR_VM_NODE_MEMCG_STAT_ITEMS];
};

struct lruvec_stats {
/* Aggregated (CPU and subtree) state */
- long state[NR_VM_NODE_STAT_ITEMS];
+ long state[NR_VM_NODE_MEMCG_STAT_ITEMS];

/* Non-hierarchical (CPU aggregated) state */
- long state_local[NR_VM_NODE_STAT_ITEMS];
+ long state_local[NR_VM_NODE_MEMCG_STAT_ITEMS];

/* Pending child counts during tree propagation */
- long state_pending[NR_VM_NODE_STAT_ITEMS];
+ long state_pending[NR_VM_NODE_MEMCG_STAT_ITEMS];
};

/*
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 989ca97402c6..59592f3c7d9b 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -192,8 +192,12 @@ enum node_stat_item {
NR_SHMEM_THPS,
NR_FILE_THPS,
NR_ANON_THPS,
- /* No memcg stats for the following fields. */
- NR_SHMEM_PMDMAPPED,
+ /*
+ * No memcg stats for the following fields. Please add stats which have
+ * memcg counterpart above NR_VM_NODE_MEMCG_STAT_ITEMS.
+ */
+ NR_VM_NODE_MEMCG_STAT_ITEMS,
+ NR_SHMEM_PMDMAPPED = NR_VM_NODE_MEMCG_STAT_ITEMS,
NR_FILE_PMDMAPPED,
NR_WRITEBACK_TEMP, /* Writeback using temporary buffers */
NR_VMSCAN_WRITE,
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 833d09c1d523..bb1bbf417a46 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1648,6 +1648,9 @@ static void memcg_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
{
int i;

+ /* Reduce by 1 for MEMCG_SWAP as that is not exposed in v2. */
+ BUILD_BUG_ON(ARRAY_SIZE(memory_stats) != MEMCG_NR_STAT - 1);
+
/*
* Provide statistics on the state of the memory subsystem as
* well as cumulative event counters that show past behavior.
@@ -5869,7 +5872,7 @@ static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu)

lstatc = per_cpu_ptr(pn->lruvec_stats_percpu, cpu);

- for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
+ for (i = 0; i < NR_VM_NODE_MEMCG_STAT_ITEMS; i++) {
delta = pn->lruvec_stats.state_pending[i];
if (delta)
pn->lruvec_stats.state_pending[i] = 0;
--
2.43.0