[PATCH 3/3] mm: convert mm->cpu_vm_cpumask into cpumask_var_t

From: KOSAKI Motohiro
Date: Mon Apr 18 2011 - 08:18:57 EST


cpumask_t is very big struct and cpu_vm_mask is placed wrong position.
It might lead to reduce cache hit ratio.

This patch has two change.
1) Move the place of cpumask into last of mm_struct. Because usually cpumask
is accessed only front bits when the system has cpu-hotplug capability
2) Convert cpu_vm_mask into cpumask_var_t. It may help to reduce memory
footprint if cpumask_size() will use nr_cpumask_bits properly in future.

In addition, this patch change the name of cpu_vm_mask with cpu_vm_mask_var.
It may help to detect out of tree cpu_vm_mask users.

This patch has no functional change.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@xxxxxxxxxxxxxx>
---
Documentation/cachetlb.txt | 2 +-
include/linux/mm_types.h | 9 ++++++---
include/linux/sched.h | 1 +
init/main.c | 2 ++
kernel/fork.c | 37 ++++++++++++++++++++++++++++++++++---
mm/init-mm.c | 1 -
6 files changed, 44 insertions(+), 8 deletions(-)

This patch don't touch x86/kerrnel/tboot.c. because it can't be compiled.

diff --git a/Documentation/cachetlb.txt b/Documentation/cachetlb.txt
index 9164ae3..9b728dc 100644
--- a/Documentation/cachetlb.txt
+++ b/Documentation/cachetlb.txt
@@ -16,7 +16,7 @@ on all processors in the system. Don't let this scare you into
thinking SMP cache/tlb flushing must be so inefficient, this is in
fact an area where many optimizations are possible. For example,
if it can be proven that a user address space has never executed
-on a cpu (see vma->cpu_vm_mask), one need not perform a flush
+on a cpu (see mm_cpumask()), one need not perform a flush
for this address space on that cpu.

First, the TLB flushing interfaces, since they are the simplest. The
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index ca01ab2..070c7f2 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -267,8 +267,6 @@ struct mm_struct {

struct linux_binfmt *binfmt;

- cpumask_t cpu_vm_mask;
-
/* Architecture-specific MM context */
mm_context_t context;

@@ -318,9 +316,14 @@ struct mm_struct {
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
pgtable_t pmd_huge_pte; /* protected by page_table_lock */
#endif
+
+ cpumask_var_t cpu_vm_mask_var;
};

/* Future-safe accessor for struct mm_struct's cpu_vm_mask. */
-#define mm_cpumask(mm) (&(mm)->cpu_vm_mask)
+static inline cpumask_t* mm_cpumask(struct mm_struct *mm)
+{
+ return mm->cpu_vm_mask_var;
+}

#endif /* _LINUX_MM_TYPES_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3f7d3f9..7068380 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2170,6 +2170,7 @@ static inline void mmdrop(struct mm_struct * mm)
if (unlikely(atomic_dec_and_test(&mm->mm_count)))
__mmdrop(mm);
}
+extern int mm_init_cpumask(struct mm_struct *mm, struct mm_struct *oldmm);

/* mmput gets rid of the mappings and all user-space */
extern void mmput(struct mm_struct *);
diff --git a/init/main.c b/init/main.c
index 4a9479e..8451425 100644
--- a/init/main.c
+++ b/init/main.c
@@ -509,6 +509,8 @@ asmlinkage void __init start_kernel(void)
sort_main_extable();
trap_init();
mm_init();
+ BUG_ON(mm_init_cpumask(&init_mm, 0));
+
/*
* Set up the scheduler prior starting any interrupts (such as the
* timer interrupt). Full topology setup happens at smp_init()
diff --git a/kernel/fork.c b/kernel/fork.c
index cc04197..5d303a2 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -486,6 +486,20 @@ static void mm_init_aio(struct mm_struct *mm)
#endif
}

+int mm_init_cpumask(struct mm_struct *mm, struct mm_struct *oldmm)
+{
+#ifdef CONFIG_CPUMASK_OFFSTACK
+ if (!alloc_cpumask_var(&mm->cpu_vm_mask_var, GFP_KERNEL))
+ return -ENOMEM;
+
+ if (oldmm)
+ cpumask_copy(mm_cpumask(mm), mm_cpumask(oldmm));
+ else
+ memset(mm_cpumask(mm), 0, cpumask_size());
+#endif
+ return 0;
+}
+
static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
{
atomic_set(&mm->mm_users, 1);
@@ -522,10 +536,20 @@ struct mm_struct * mm_alloc(void)
struct mm_struct * mm;

mm = allocate_mm();
- if (mm) {
- memset(mm, 0, sizeof(*mm));
- mm = mm_init(mm, current);
+ if (!mm)
+ return NULL;
+
+ memset(mm, 0, sizeof(*mm));
+ mm = mm_init(mm, current);
+ if (!mm)
+ return NULL;
+
+ if (mm_init_cpumask(mm, NULL)) {
+ mm_free_pgd(mm);
+ free_mm(mm);
+ return NULL;
}
+
return mm;
}

@@ -537,6 +561,7 @@ struct mm_struct * mm_alloc(void)
void __mmdrop(struct mm_struct *mm)
{
BUG_ON(mm == &init_mm);
+ free_cpumask_var(mm->cpu_vm_mask_var);
mm_free_pgd(mm);
destroy_context(mm);
mmu_notifier_mm_destroy(mm);
@@ -691,6 +716,9 @@ struct mm_struct *dup_mm(struct task_struct *tsk)
if (!mm_init(mm, tsk))
goto fail_nomem;

+ if (mm_init_cpumask(mm, oldmm))
+ goto fail_nocpumask;
+
if (init_new_context(tsk, mm))
goto fail_nocontext;

@@ -717,6 +745,9 @@ fail_nomem:
return NULL;

fail_nocontext:
+ free_cpumask_var(mm->cpu_vm_mask_var);
+
+fail_nocpumask:
/*
* If init_new_context() failed, we cannot use mmput() to free the mm
* because it calls destroy_context()
diff --git a/mm/init-mm.c b/mm/init-mm.c
index 1d29cdf..4019979 100644
--- a/mm/init-mm.c
+++ b/mm/init-mm.c
@@ -21,6 +21,5 @@ struct mm_struct init_mm = {
.mmap_sem = __RWSEM_INITIALIZER(init_mm.mmap_sem),
.page_table_lock = __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock),
.mmlist = LIST_HEAD_INIT(init_mm.mmlist),
- .cpu_vm_mask = CPU_MASK_ALL,
INIT_MM_CONTEXT(init_mm)
};
--
1.7.3.1



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/