[PATCH 4/4] slub: deactivate freelist of kmem_cache_cpu all at once in deactivate_slab()

From: Joonsoo Kim
Date: Fri Jun 08 2012 - 13:25:17 EST


Current implementation of deactivate_slab() which deactivate
freelist of kmem_cache_cpu one by one is inefficient.
This patch changes it to deactivate freelist all at once.
But, there is no overall performance benefit,
because deactivate_slab() is invoked infrequently.

Signed-off-by: Joonsoo Kim <js1304@xxxxxxxxx>

diff --git a/mm/slub.c b/mm/slub.c
index b5f2108..7bcb434 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1733,16 +1733,14 @@ void init_kmem_cache_cpus(struct kmem_cache *s)
*/
static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
{
- enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };
struct page *page = c->page;
struct kmem_cache_node *n = get_node(s, page_to_nid(page));
- int lock = 0;
- enum slab_modes l = M_NONE, m = M_NONE;
- void *freelist;
- void *nextfree;
- int tail = DEACTIVATE_TO_HEAD;
+ void *freelist, *lastfree = NULL;
+ unsigned int nr_free = 0;
struct page new;
- struct page old;
+ void *prior;
+ unsigned long counters;
+ int lock = 0, tail = DEACTIVATE_TO_HEAD;

if (page->freelist) {
stat(s, DEACTIVATE_REMOTE_FREES);
@@ -1752,127 +1750,54 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
c->tid = next_tid(c->tid);
c->page = NULL;
freelist = c->freelist;
- c->freelist = NULL;
-
- /*
- * Stage one: Free all available per cpu objects back
- * to the page freelist while it is still frozen. Leave the
- * last one.
- *
- * There is no need to take the list->lock because the page
- * is still frozen.
- */
- while (freelist && (nextfree = get_freepointer(s, freelist))) {
- void *prior;
- unsigned long counters;
-
- do {
- prior = page->freelist;
- counters = page->counters;
- set_freepointer(s, freelist, prior);
- new.counters = counters;
- new.inuse--;
- VM_BUG_ON(!new.frozen);
-
- } while (!__cmpxchg_double_slab(s, page,
- prior, counters,
- freelist, new.counters,
- "drain percpu freelist"));
-
- freelist = nextfree;
+ while (freelist) {
+ lastfree = freelist;
+ freelist = get_freepointer(s, freelist);
+ nr_free++;
}

- /*
- * Stage two: Ensure that the page is unfrozen while the
- * list presence reflects the actual number of objects
- * during unfreeze.
- *
- * We setup the list membership and then perform a cmpxchg
- * with the count. If there is a mismatch then the page
- * is not unfrozen but the page is on the wrong list.
- *
- * Then we restart the process which may have to remove
- * the page from the list that we just put it on again
- * because the number of objects in the slab may have
- * changed.
- */
-redo:
+ freelist = c->freelist;
+ c->freelist = NULL;

- old.freelist = page->freelist;
- old.counters = page->counters;
- VM_BUG_ON(!old.frozen);
+ do {
+ if (lock) {
+ lock = 0;
+ spin_unlock(&n->list_lock);
+ }

- /* Determine target state of the slab */
- new.counters = old.counters;
- if (freelist) {
- new.inuse--;
- set_freepointer(s, freelist, old.freelist);
- new.freelist = freelist;
- } else
- new.freelist = old.freelist;
+ prior = page->freelist;
+ counters = page->counters;

- new.frozen = 0;
+ if (lastfree)
+ set_freepointer(s, lastfree, prior);
+ else
+ freelist = prior;

- if (!new.inuse && n->nr_partial > s->min_partial)
- m = M_FREE;
- else if (new.freelist) {
- m = M_PARTIAL;
- if (!lock) {
- lock = 1;
- /*
- * Taking the spinlock removes the possiblity
- * that acquire_slab() will see a slab page that
- * is frozen
- */
- spin_lock(&n->list_lock);
- }
- } else {
- m = M_FULL;
- if (kmem_cache_debug(s) && !lock) {
+ new.counters = counters;
+ VM_BUG_ON(!new.frozen);
+ new.inuse -= nr_free;
+ new.frozen = 0;
+
+ if (new.inuse || n->nr_partial <= s->min_partial) {
lock = 1;
- /*
- * This also ensures that the scanning of full
- * slabs from diagnostic functions will not see
- * any frozen slabs.
- */
spin_lock(&n->list_lock);
}
- }
-
- if (l != m) {
-
- if (l == M_PARTIAL)
-
- remove_partial(n, page);
-
- else if (l == M_FULL)
-
- remove_full(s, page);
-
- if (m == M_PARTIAL) {

+ } while (!__cmpxchg_double_slab(s, page,
+ prior, counters,
+ freelist, new.counters,
+ "drain percpu freelist"));
+ if (lock) {
+ if (kmem_cache_debug(s) && !freelist) {
+ add_full(s, n, page);
+ stat(s, DEACTIVATE_FULL);
+ } else {
add_partial(n, page, tail);
stat(s, tail);
-
- } else if (m == M_FULL) {
-
- stat(s, DEACTIVATE_FULL);
- add_full(s, n, page);
-
}
- }
-
- l = m;
- if (!__cmpxchg_double_slab(s, page,
- old.freelist, old.counters,
- new.freelist, new.counters,
- "unfreezing slab"))
- goto redo;
-
- if (lock)
spin_unlock(&n->list_lock);
-
- if (m == M_FREE) {
+ } else {
+ VM_BUG_ON(new.inuse);
stat(s, DEACTIVATE_EMPTY);
discard_slab(s, page);
stat(s, FREE_SLAB);
--
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/