[PATCH] per thread vma cache.

From: KAMEZAWA Hiroyuki
Date: Mon Dec 12 2011 - 04:31:19 EST

Next message: Peter Zijlstra: "Re: How to draw values for /proc/stat"
Previous message: Ian Campbell: "Re: [Xen-devel] [PATCH 6/6] xen/xenbus-backend: Only registerdevice if communication ring is local"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]

This is a toy patch. How do you think ?

This is a patch for per-thread mmap_cache without heavy atomic ops.

I'm sure overhead of find_vma() is pretty small in usual application
and this will not show good improvement. But I think, if we need
to have cache of vma, it should be per thread rather than per mm.

This patch adds thread->mmap_cache, a pointer for vm_area_struct
and update it appropriately. Because we have no refcnt on vm_area_struct,
thread->mmap_cache may be a stale pointer. This patch detects stale
pointer by checking

- thread->mmap_cache is one of SLABs in vm_area_cachep.
- thread->mmap_cache->vm_mm == mm.

vma->vm_mm will be cleared before kmem_cache_free() by this patch.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@xxxxxxxxxxxxxx>

Note:
Kosaki's work will merge find_vma_prev() and find_vma().
Then, we'll cover most of case just by modifying find_vma().
---
include/linux/mm_types.h | 2 +
include/linux/sched.h | 1 +
include/linux/slab_def.h | 13 ++++++++++
include/linux/slub_def.h | 12 +++++++++
init/Kconfig | 5 ++++
kernel/fork.c | 3 +-
mm/mmap.c | 61 +++++++++++++++++++++++++++++++++++++++-------
mm/nommu.c | 4 +-
8 files changed, 89 insertions(+), 12 deletions(-)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 81a56df..8a9be1a 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -255,6 +255,8 @@ struct vm_area_struct {
#endif
};

+extern void free_vma(struct vm_area_struct *vma);
+
struct core_thread {
struct task_struct *task;
struct core_thread *next;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index cbb5d3e..a161c2b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1294,6 +1294,7 @@ struct task_struct {
#endif

struct mm_struct *mm, *active_mm;
+ struct vm_area_struct *mmap_cache;
#ifdef CONFIG_COMPAT_BRK
unsigned brk_randomized:1;
#endif
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index d00e0ba..763c1d9 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -214,4 +214,17 @@ found:

#endif /* CONFIG_NUMA */

+/*
+ * Check the object is under specified kmem_cache.
+ */
+static inline bool is_kmem_cache(void *data, struct kmem_cache *s)
+{
+ struct page *page;
+
+ page = virt_to_head_page(data);
+ if (PageSlab(page) && page->lru.prev == s)
+ return true;
+ return false;
+}
+
#endif /* _LINUX_SLAB_DEF_H */
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index a32bcfd..9eba7e7 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -10,6 +10,7 @@
#include <linux/gfp.h>
#include <linux/workqueue.h>
#include <linux/kobject.h>
+#include <linux/mm.h>

#include <linux/kmemleak.h>

@@ -313,4 +314,15 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
}
#endif

+/*
+ * Check the object is under specified kmem cache.
+ */
+static inline bool is_kmem_cache(void *data, struct kmem_cache *s)
+{
+ struct page *page = virt_to_head_page(data);
+
+ if (PageSlab(page) && page->slab == s)
+ return true;
+ return false;
+}
#endif /* _LINUX_SLUB_DEF_H */
diff --git a/init/Kconfig b/init/Kconfig
index 6dfc8c3..7fcfffd 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1266,6 +1266,11 @@ config SLOB

endchoice

+config PER_THREAD_MMAP_CACHE
+ bool
+ default y
+ depends on SLAB || SLUB
+
config MMAP_ALLOW_UNINITIALIZED
bool "Allow mmapped anonymous memory to be uninitialized"
depends on EXPERT && !MMU
diff --git a/kernel/fork.c b/kernel/fork.c
index e20518d..18d73c2 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -432,7 +432,7 @@ out:
fail_nomem_anon_vma_fork:
mpol_put(pol);
fail_nomem_policy:
- kmem_cache_free(vm_area_cachep, tmp);
+ free_vma(tmp);
fail_nomem:
retval = -ENOMEM;
vm_unacct_memory(charge);
@@ -825,6 +825,7 @@ good_mm:

tsk->mm = mm;
tsk->active_mm = mm;
+ tsk->mmap_cache = NULL;
return 0;

fail_nomem:
diff --git a/mm/mmap.c b/mm/mmap.c
index 83813fa..7b86e05 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -238,7 +238,7 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
removed_exe_file_vma(vma->vm_mm);
}
mpol_put(vma_policy(vma));
- kmem_cache_free(vm_area_cachep, vma);
+ free_vma(vma);
return next;
}

@@ -478,8 +478,11 @@ __vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma,
if (next)
next->vm_prev = prev;
rb_erase(&vma->vm_rb, &mm->mm_rb);
- if (mm->mmap_cache == vma)
+ if (mm->mmap_cache == vma) {
mm->mmap_cache = prev;
+ if (current->mm == mm)
+ current->mmap_cache = prev;
+ }
}

/*
@@ -642,7 +645,7 @@ again: remove_next = 1 + (end > next->vm_end);
anon_vma_merge(vma, next);
mm->map_count--;
mpol_put(vma_policy(next));
- kmem_cache_free(vm_area_cachep, next);
+ free_vma(next);
/*
* In mprotect's case 6 (see comments on vma_merge),
* we must remove another next too. It would clutter
@@ -1364,7 +1367,7 @@ unmap_and_free_vma:
unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
charged = 0;
free_vma:
- kmem_cache_free(vm_area_cachep, vma);
+ free_vma(vma);
unacct_error:
if (charged)
vm_unacct_memory(charged);
@@ -1588,10 +1591,42 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,

EXPORT_SYMBOL(get_unmapped_area);

+#ifdef CONFIG_PER_THREAD_MMAP_CACHE
+static struct vm_area_struct *thread_mmap_cache(struct mm_struct *mm)
+{
+ struct vm_area_struct *vma = current->mmap_cache;
+
+ if (!vma || current->mm != mm)
+ return NULL;
+
+ if ((vma->vm_mm != mm) || !is_kmem_cache(vma, vm_area_cachep))
+ return NULL;
+
+ return vma;
+}
+
+static void set_thread_mmap_cache(struct mm_struct *mm,
+ struct vm_area_struct *vma)
+{
+ if (current->mm == mm)
+ current->mmap_cache = vma;
+}
+#else
+static struct vm_area_struct *thread_mmap_cache(struct mm_struct *mm)
+{
+ return NULL;
+}
+
+static void set_thread_mmap_cache(struct mm_struct *mm,
+ struct vm_area_struct *vma)
+{
+}
+#endif
+
/* Look up the first VMA which satisfies addr < vm_end, NULL if none. */
struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
{
- struct vm_area_struct *vma = NULL;
+ struct vm_area_struct *vma = thread_mmap_cache(mm);

if (mm) {
/* Check the cache first. */
@@ -1617,8 +1652,10 @@ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
} else
rb_node = rb_node->rb_right;
}
- if (vma)
+ if (vma) {
mm->mmap_cache = vma;
+ set_thread_mmap_cache(mm, vma);
+ }
}
}
return vma;
@@ -2017,7 +2054,7 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
out_free_mpol:
mpol_put(pol);
out_free_vma:
- kmem_cache_free(vm_area_cachep, new);
+ free_vma(new);
out_err:
return err;
}
@@ -2400,7 +2437,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
out_free_mempol:
mpol_put(pol);
out_free_vma:
- kmem_cache_free(vm_area_cachep, new_vma);
+ free_vma(new_vma);
return NULL;
}

@@ -2506,7 +2543,7 @@ int install_special_mapping(struct mm_struct *mm,
return 0;

out:
- kmem_cache_free(vm_area_cachep, vma);
+ free_vma(vma);
return ret;
}

@@ -2675,6 +2712,12 @@ void mm_drop_all_locks(struct mm_struct *mm)
mutex_unlock(&mm_all_locks_mutex);
}

+void free_vma(struct vm_area_struct *vma)
+{
+ vma->vm_mm = NULL;
+ kmem_cache_free(vm_area_cachep, vma);
+}
+
/*
* initialise the VMA slab
*/
diff --git a/mm/nommu.c b/mm/nommu.c
index b982290..3c98fd5 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -793,7 +793,7 @@ static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma)
removed_exe_file_vma(mm);
}
put_nommu_region(vma->vm_region);
- kmem_cache_free(vm_area_cachep, vma);
+ free_vma(vma);
}

/*
@@ -1443,7 +1443,7 @@ error:
fput(vma->vm_file);
if (vma->vm_flags & VM_EXECUTABLE)
removed_exe_file_vma(vma->vm_mm);
- kmem_cache_free(vm_area_cachep, vma);
+ free_vma(vma);
kleave(" = %d", ret);
return ret;

--
1.7.4.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

Next message: Peter Zijlstra: "Re: How to draw values for /proc/stat"
Previous message: Ian Campbell: "Re: [Xen-devel] [PATCH 6/6] xen/xenbus-backend: Only registerdevice if communication ring is local"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]