[PATCH] topology: make for_each_node_with_cpus() O(N)

From: Yury Norov
Date: Fri May 09 2025 - 12:24:51 EST


From: Yury Norov [NVIDIA] <yury.norov@xxxxxxxxx>

for_each_node_with_cpus() calls nr_cpus_node() at every iteration, which
makes it O(N^2). Kernel tracks such nodes with N_CPU record in node_states
array. Switching to it makes for_each_node_with_cpus() O(N).

Signed-off-by: Yury Norov [NVIDIA] <yury.norov@xxxxxxxxx>
---
include/linux/nodemask.h | 1 +
include/linux/topology.h | 5 +----
2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
index f0ac0633366b..1e2bdda1a0a5 100644
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -541,6 +541,7 @@ static __always_inline int node_random(const nodemask_t *maskp)

#define for_each_node(node) for_each_node_state(node, N_POSSIBLE)
#define for_each_online_node(node) for_each_node_state(node, N_ONLINE)
+#define for_each_node_with_cpus(node) for_each_node_state(node, N_CPU)

/*
* For nodemask scratch area.
diff --git a/include/linux/topology.h b/include/linux/topology.h
index 24e715f0f6d2..ffee6b4a071a 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -29,6 +29,7 @@

#include <linux/arch_topology.h>
#include <linux/cpumask.h>
+#include <linux/nodemask.h>
#include <linux/bitops.h>
#include <linux/mmzone.h>
#include <linux/smp.h>
@@ -39,10 +40,6 @@
#define nr_cpus_node(node) cpumask_weight(cpumask_of_node(node))
#endif

-#define for_each_node_with_cpus(node) \
- for_each_online_node(node) \
- if (nr_cpus_node(node))
-
int arch_update_cpu_topology(void);

/* Conform to ACPI 2.0 SLIT distance definitions */
--
2.43.0