[PATCH 1/7] mm: allocate mm_cpumask dynamically based on nr_cpu_ids

From: Rik van Riel
Date: Wed Jun 20 2018 - 15:57:12 EST


The mm_struct always contains a cpumask bitmap, regardless of
CONFIG_CPUMASK_OFFSTACK. That means the first step can be to
simplify things, and simply have one bitmask at the end of the
mm_struct for the mm_cpumask.

The second step is to determine the correct size for the
mm_struct slab object from the size of the mm_struct
(excluding the cpu bitmap) and the size the cpumask.

For init_mm we can simply allocate the maximum size this
kernel is compiled for, since we only have one init_mm
in the system, anyway.

Signed-off-by: Rik van Riel <riel@xxxxxxxxxxx>
Tested-by: Song Liu <songliubraving@xxxxxx>
---
include/linux/mm_types.h | 18 ++++++++----------
kernel/fork.c | 14 ++++++++------
mm/init-mm.c | 10 ++++++++++
3 files changed, 26 insertions(+), 16 deletions(-)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 21612347d311..8e91632958f3 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -427,8 +427,6 @@ struct mm_struct {

struct linux_binfmt *binfmt;

- cpumask_var_t cpu_vm_mask_var;
-
/* Architecture-specific MM context */
mm_context_t context;

@@ -465,9 +463,6 @@ struct mm_struct {
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
pgtable_t pmd_huge_pte; /* protected by page_table_lock */
#endif
-#ifdef CONFIG_CPUMASK_OFFSTACK
- struct cpumask cpumask_allocation;
-#endif
#ifdef CONFIG_NUMA_BALANCING
/*
* numa_next_scan is the next time that the PTEs will be marked
@@ -502,22 +497,25 @@ struct mm_struct {
/* HMM needs to track a few things per mm */
struct hmm *hmm;
#endif
+
+ /*
+ * The mm_cpumask needs to be at the end of mm_struct, because it
+ * is dynamically sized based on nr_cpu_ids.
+ */
+ unsigned long cpu_bitmap[];
} __randomize_layout;

extern struct mm_struct init_mm;

static inline void mm_init_cpumask(struct mm_struct *mm)
{
-#ifdef CONFIG_CPUMASK_OFFSTACK
- mm->cpu_vm_mask_var = &mm->cpumask_allocation;
-#endif
- cpumask_clear(mm->cpu_vm_mask_var);
+ cpumask_clear((struct cpumask *)&mm->cpu_bitmap);
}

/* Future-safe accessor for struct mm_struct's cpu_vm_mask. */
static inline cpumask_t *mm_cpumask(struct mm_struct *mm)
{
- return mm->cpu_vm_mask_var;
+ return (struct cpumask *)&mm->cpu_bitmap;
}

struct mmu_gather;
diff --git a/kernel/fork.c b/kernel/fork.c
index a5d21c42acfc..c6a20bc78102 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2242,6 +2242,8 @@ static void sighand_ctor(void *data)

void __init proc_caches_init(void)
{
+ unsigned int mm_size;
+
sighand_cachep = kmem_cache_create("sighand_cache",
sizeof(struct sighand_struct), 0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU|
@@ -2258,15 +2260,15 @@ void __init proc_caches_init(void)
sizeof(struct fs_struct), 0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
NULL);
+
/*
- * FIXME! The "sizeof(struct mm_struct)" currently includes the
- * whole struct cpumask for the OFFSTACK case. We could change
- * this to *only* allocate as much of it as required by the
- * maximum number of CPU's we can ever have. The cpumask_allocation
- * is at the end of the structure, exactly for that reason.
+ * The mm_cpumask is located at the end of mm_struct, and is
+ * dynamically sized based on nr_cpu_ids.
*/
+ mm_size = sizeof(struct mm_struct) + cpumask_size();
+
mm_cachep = kmem_cache_create_usercopy("mm_struct",
- sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
+ mm_size, ARCH_MIN_MMSTRUCT_ALIGN,
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
offsetof(struct mm_struct, saved_auxv),
sizeof_field(struct mm_struct, saved_auxv),
diff --git a/mm/init-mm.c b/mm/init-mm.c
index f94d5d15ebc0..20fe222fe4c0 100644
--- a/mm/init-mm.c
+++ b/mm/init-mm.c
@@ -15,6 +15,15 @@
#define INIT_MM_CONTEXT(name)
#endif

+/*
+ * For dynamically allocated mm_structs, there is a dynamically sized cpumask
+ * at the end of the structure, the size of which depends on nr_cpu_ids. That
+ * way we allocate only as much memory for mm_cpumask() as needed for the
+ * hundreds, or thousands of processes that a system typically runs.
+ *
+ * Since there is only one init_mm in the entire system, keep it simple
+ * and size this cpu_bitmask to NR_CPUS.
+ */
struct mm_struct init_mm = {
.mm_rb = RB_ROOT,
.pgd = swapper_pg_dir,
@@ -24,5 +33,6 @@ struct mm_struct init_mm = {
.page_table_lock = __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock),
.mmlist = LIST_HEAD_INIT(init_mm.mmlist),
.user_ns = &init_user_ns,
+ .cpu_bitmap = { [BITS_TO_LONGS(NR_CPUS)] = 0},
INIT_MM_CONTEXT(init_mm)
};
--
2.14.4