[RFC 2/3] powerpc/numa: Define mapping between HW and kernel cpus

From: Michael Bringmann
Date: Tue Dec 11 2018 - 17:04:07 EST


Define interface to map external powerpc cpus across multiple nodes
to a range of kernel cpu values. Mapping is intended to prevent
confusion within the kernel about the cpu+node mapping, and the
changes in configuration that may happen due to powerpc LPAR
migration or other associativity changes during the lifetime of a
system. These interfaces will be used entirely within the powerpc
kernel code to maintain separation between the machine and kernel
contexts.

Signed-off-by: Michael Bringmann <mwb@xxxxxxxxxxxxxxxxxx>
---
arch/powerpc/include/asm/topology.h | 31 +++++++
arch/powerpc/platforms/pseries/Kconfig | 10 ++
arch/powerpc/platforms/pseries/Makefile | 1
arch/powerpc/platforms/pseries/cpuremap.c | 131 +++++++++++++++++++++++++++++
4 files changed, 173 insertions(+)
create mode 100644 arch/powerpc/platforms/pseries/cpuremap.c

diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index 4621f40..db11969 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -135,5 +135,36 @@ static inline void shared_proc_topology_init(void) {}
#endif
#endif

+#define CPUREMAP_NO_CPU (~0)
+#define CPUREMAP_NO_THREAD (~0)
+
+#ifdef CONFIG_CPUREMAP
+extern int cpuremap_thread_to_cpu(int thread_index);
+ /* Return CPUREMAP_NO_CPU if not found */
+extern int cpuremap_map_cpu(int thread_index, int in_core_ndx, int node);
+ /* Return CPUREMAP_NO_CPU if fails */
+extern int cpuremap_reserve_cpu(int cpu);
+ /* Return CPUREMAP_NO_CPU if fails */
+extern int cpuremap_release_cpu(int cpu);
+ /* Return CPUREMAP_NO_CPU if fails */
+extern int cpuremap_cpu_to_thread(int cpu);
+ /* Return CPUREMAP_NO_THREAD if not found */
+extern void cpuremap_init(void);
+ /* Identify necessary constants & alloc memory at boot */
+#else
+static inline int cpuremap_thread_to_cpu(int thread_index)
+{
+ return thread_index;
+}
+static inline int cpuremap_map_cpu(int thread_index, int in_core_ndx, int node)
+{
+ return thread_index;
+}
+static inline int cpuremap_reserve_cpu(int cpu) { return cpu; }
+static inline int cpuremap_release_cpu(int cpu) { return cpu; }
+static inline int cpuremap_cpu_to_thread(int cpu) { return cpu; }
+static inline void cpuremap_init(void) {}
+#endif
+
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_TOPOLOGY_H */
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index 2e4bd32..c35009f 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -145,3 +145,13 @@ config PAPR_SCM
tristate "Support for the PAPR Storage Class Memory interface"
help
Enable access to hypervisor provided storage class memory.
+ Enable access to hypervisor provided storage class memory.
+
+config CPUREMAP
+ bool "Support for mapping hw cpu+node to kernel index"
+ depends on SMP && (PPC_PSERIES)
+ ---help---
+ Say Y here to be able to remap hw cpu+node to standardized
+ kernel CPUs at runtime on Pseries machines.
+
+ Say N if you are unsure.
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index a43ec84..ad49d8e 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_KEXEC_CORE) += kexec.o
obj-$(CONFIG_PSERIES_ENERGY) += pseries_energy.o

obj-$(CONFIG_HOTPLUG_CPU) += hotplug-cpu.o
+obj-$(CONFIG_CPUREMAP) += cpuremap.o
obj-$(CONFIG_MEMORY_HOTPLUG) += hotplug-memory.o pmem.o

obj-$(CONFIG_HVC_CONSOLE) += hvconsole.o
diff --git a/arch/powerpc/platforms/pseries/cpuremap.c b/arch/powerpc/platforms/pseries/cpuremap.c
new file mode 100644
index 0000000..86fdf12
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/cpuremap.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/string.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+#include <asm/prom.h>
+#include <asm/topology.h>
+
+struct cpuremap_cpu {
+ int thread_index;
+ /* Set to thread_index from ibm,ppc-interrupt-server#s arrays
+ * Don't clear when release'ed
+ */
+ int node;
+ bool in_use;
+ /* Set to true when reserve'ed
+ * Don't clear when release'ed
+ */
+};
+
+struct cpuremap_struct {
+ int num_nodes;
+ int num_cores;
+ int num_threads_per_core;
+ struct cpuremap_cpu *threads;
+} cpuremap_data;
+
+
+void cpuremap_init(void)
+{
+ int i, k;
+
+ /* Identify necessary constants & alloc memory at boot */
+ cpuremap_data.num_threads_per_core = 8;
+ cpuremap_data.num_cores = 32;
+ cpuremap_data.num_nodes =
+ nr_cpu_ids /
+ (cpuremap_data.num_threads_per_core * cpuremap_data.num_cores);
+ cpuremap_data.threads = kcalloc(nr_cpu_ids, sizeof(struct cpuremap_cpu), GFP_KERNEL);
+
+ k = cpuremap_data.num_nodes *
+ cpuremap_data.num_threads_per_core *
+ cpuremap_data.num_cores;
+ for (i = 0; i < k; k++)
+ cpuremap_data.threads[i].thread_index = CPUREMAP_NO_THREAD;
+}
+
+int cpuremap_thread_to_cpu(int thread_index)
+{
+ int i, k;
+
+ /* Return NO_CPU if not found */
+ for (i = thread_index, k = 0; k < nr_cpu_ids; k++) {
+ if (cpuremap_data.threads[i].in_use &&
+ (cpuremap_data.threads[i].thread_index == thread_index)) {
+ cpuremap_data.threads[i].in_use = true;
+ cpuremap_data.threads[i].thread_index = thread_index;
+ return i;
+ }
+ if (i >= nr_cpu_ids)
+ i = 0;
+ }
+ return CPUREMAP_NO_CPU;
+}
+
+int cpuremap_cpu_to_thread(int cpu)
+{
+ /* Return NO_THREAD if not found */
+ if (cpuremap_data.threads[cpu].in_use)
+ return cpuremap_data.threads[cpu].thread_index;
+ return CPUREMAP_NO_THREAD;
+}
+
+int cpuremap_map_cpu(int thread_index, int in_core_ndx, int node)
+{
+ int first_thread, i, k;
+
+ /* Return NO_CPU if fails */
+ first_thread = (node *
+ (cpuremap_data.num_threads_per_core *
+ cpuremap_data.num_cores)) + in_core_ndx;
+
+ /* Alternative 0: Compressed map of cpus+nodes+threads
+ * assuming that no system will be fully built out.
+ * Alternative 1: Fully compact. Allocate new cpu ids
+ * as needed. No 'pretty' separation between nodes.
+ * Alternative 2: Also map incoming nodes from pHyp
+ * to virtual nodes for purposes of new cpu ids.
+ */
+
+ if (first_thread > nr_cpu_ids)
+ first_thread = 0 + in_core_ndx;
+ for (i = first_thread, k = 0; k < nr_cpu_ids; k++) {
+ if (!cpuremap_data.threads[i].in_use || (cpuremap_data.threads[i].thread_index == thread_index)) {
+ cpuremap_data.threads[i].thread_index = thread_index;
+ cpuremap_data.threads[i].node = node;
+ return i;
+ }
+ if (i >= nr_cpu_ids)
+ i = 0;
+ }
+ return CPUREMAP_NO_CPU;
+}
+
+int cpuremap_reserve_cpu(int cpu)
+{
+ if (!cpuremap_data.threads[cpu].in_use) {
+ cpuremap_data.threads[cpu].in_use = true;
+ return cpu;
+ }
+ return CPUREMAP_NO_CPU;
+}
+
+int cpuremap_release_cpu(int cpu)
+{
+ if (cpuremap_data.threads[cpu].in_use) {
+ cpuremap_data.threads[cpu].in_use = false;
+ return cpu;
+ }
+ return CPUREMAP_NO_CPU;
+}
+
+int cpuremap_free_cpu(int cpu)
+{
+ /* Return NO_CPU if fails */
+ if (cpuremap_data.threads[cpu].in_use) {
+ cpuremap_data.threads[cpu].in_use = false;
+ return cpu;
+ }
+ return CPUREMAP_NO_CPU;
+}