Re: [RT LATENCY] 249 microsecond latency caused by slub'sunfreeze_partials() code.

From: Christoph Lameter
Date: Tue May 28 2013 - 14:29:46 EST


I just ran some quick tests and the following seems to work.

Critical portions that need additional review (Joonsoo?) are the
modifications to get_partialinode() and __slab_free().


Subject: slub: Make cpu partial slab support configurable V2

cpu partial support can introduce level of indeterminism that is not wanted
in certain context (like a realtime kernel). Make it configurable.

Signed-off-by: Christoph Lameter <cl@xxxxxxxxx>

Index: linux/include/linux/slub_def.h
===================================================================
--- linux.orig/include/linux/slub_def.h 2013-05-20 15:21:26.925704505 -0500
+++ linux/include/linux/slub_def.h 2013-05-20 15:27:40.827613445 -0500
@@ -47,12 +47,24 @@ struct kmem_cache_cpu {
void **freelist; /* Pointer to next available object */
unsigned long tid; /* Globally unique transaction id */
struct page *page; /* The slab from which we are allocating */
+#ifdef CONFIG_SLUB_CPU_PARTIAL
struct page *partial; /* Partially allocated frozen slabs */
+#endif
#ifdef CONFIG_SLUB_STATS
unsigned stat[NR_SLUB_STAT_ITEMS];
#endif
};

+static inline struct page *kmem_cache_cpu_partial_pages
+ (struct kmem_cache_cpu *c)
+{
+#ifdef CONFIG_SLUB_CPU_PARTIAL
+ return c->partial;
+#else
+ return NULL;
+#endif
+}
+
/*
* Word size structure that can be atomically updated or read and that
* contains both the order and the number of objects that a slab of the
@@ -73,7 +85,9 @@ struct kmem_cache {
int size; /* The size of an object including meta data */
int object_size; /* The size of an object without meta data */
int offset; /* Free pointer offset. */
+#ifdef CONFIG_SLUB_CPU_PARTIAL
int cpu_partial; /* Number of per cpu partial objects to keep around */
+#endif
struct kmem_cache_order_objects oo;

/* Allocation and freeing of slabs */
@@ -104,6 +118,15 @@ struct kmem_cache {
struct kmem_cache_node *node[MAX_NUMNODES];
};

+static inline int kmem_cache_cpu_partial(struct kmem_cache *s)
+{
+#ifdef CONFIG_SLUB_CPU_PARTIAL
+ return s->cpu_partial;
+#else
+ return 0;
+#endif
+}
+
void *kmem_cache_alloc(struct kmem_cache *, gfp_t);
void *__kmalloc(size_t size, gfp_t flags);

Index: linux/mm/slub.c
===================================================================
--- linux.orig/mm/slub.c 2013-05-20 15:21:26.925704505 -0500
+++ linux/mm/slub.c 2013-05-20 15:38:00.681437630 -0500
@@ -1530,7 +1530,9 @@ static inline void *acquire_slab(struct
return freelist;
}

+#ifdef CONFIG_SLUB_CPU_PARTIAL
static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain);
+#endif
static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags);

/*
@@ -1570,10 +1572,15 @@ static void *get_partial_node(struct kme
stat(s, ALLOC_FROM_PARTIAL);
object = t;
} else {
+#ifdef CONFIG_SLUB_CPU_PARTIAL
put_cpu_partial(s, page, 0);
stat(s, CPU_PARTIAL_NODE);
+#else
+ BUG();
+#endif
}
- if (kmem_cache_debug(s) || available > s->cpu_partial / 2)
+ if (kmem_cache_debug(s) ||
+ available > kmem_cache_cpu_partial(s) / 2)
break;

}
@@ -1874,6 +1881,7 @@ redo:
}
}

+#ifdef CONFIG_SLUB_CPU_PARTIAL
/*
* Unfreeze all the cpu partial slabs.
*
@@ -1988,6 +1996,7 @@ static void put_cpu_partial(struct kmem_

} while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage);
}
+#endif

static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
{
@@ -2012,7 +2021,9 @@ static inline void __flush_cpu_slab(stru
if (c->page)
flush_slab(s, c);

+#ifdef CONFIG_SLUB_CPU_PARTIAL
unfreeze_partials(s, c);
+#endif
}
}

@@ -2028,7 +2039,7 @@ static bool has_cpu_slab(int cpu, void *
struct kmem_cache *s = info;
struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);

- return c->page || c->partial;
+ return c->page || kmem_cache_cpu_partial_pages(c);
}

static void flush_all(struct kmem_cache *s)
@@ -2224,6 +2235,7 @@ static void *__slab_alloc(struct kmem_ca
page = c->page;
if (!page)
goto new_slab;
+
redo:

if (unlikely(!node_match(page, node))) {
@@ -2277,10 +2289,12 @@ load_freelist:

new_slab:

- if (c->partial) {
+ if (kmem_cache_cpu_partial_pages(c)) {
+#ifdef CONFIG_SLUB_CPU_PARTIAL
page = c->page = c->partial;
c->partial = page->next;
stat(s, CPU_PARTIAL_ALLOC);
+#endif
c->freelist = NULL;
goto redo;
}
@@ -2495,6 +2509,7 @@ static void __slab_free(struct kmem_cach
new.inuse--;
if ((!new.inuse || !prior) && !was_frozen) {

+#ifdef CONFIG_SLUB_CPU_PARTIAL
if (!kmem_cache_debug(s) && !prior)

/*
@@ -2503,7 +2518,9 @@ static void __slab_free(struct kmem_cach
*/
new.frozen = 1;

- else { /* Needs to be taken off a list */
+ else
+#endif
+ { /* Needs to be taken off a list */

n = get_node(s, page_to_nid(page));
/*
@@ -2525,6 +2542,7 @@ static void __slab_free(struct kmem_cach
"__slab_free"));

if (likely(!n)) {
+#ifdef CONFIG_SLUB_CPU_PARTIAL

/*
* If we just froze the page then put it onto the
@@ -2534,6 +2552,7 @@ static void __slab_free(struct kmem_cach
put_cpu_partial(s, page, 1);
stat(s, CPU_PARTIAL_FREE);
}
+#endif
/*
* The list lock was not taken therefore no list
* activity can be necessary.
@@ -3041,7 +3060,7 @@ static int kmem_cache_open(struct kmem_c
* list to avoid pounding the page allocator excessively.
*/
set_min_partial(s, ilog2(s->size) / 2);
-
+#ifdef CONFIG_SLUB_CPU_PARTIAL
/*
* cpu_partial determined the maximum number of objects kept in the
* per cpu partial lists of a processor.
@@ -3069,6 +3088,7 @@ static int kmem_cache_open(struct kmem_c
s->cpu_partial = 13;
else
s->cpu_partial = 30;
+#endif

#ifdef CONFIG_NUMA
s->remote_node_defrag_ratio = 1000;
@@ -4283,14 +4303,15 @@ static ssize_t show_slab_objects(struct
total += x;
nodes[node] += x;

+#ifdef CONFIG_SLUB_CPU_PARTIAL
page = ACCESS_ONCE(c->partial);
if (page) {
x = page->pobjects;
total += x;
nodes[node] += x;
}
-
per_cpu[node]++;
+#endif
}
}

@@ -4442,6 +4463,7 @@ static ssize_t min_partial_store(struct
}
SLAB_ATTR(min_partial);

+#ifdef CONFIG_SLUB_CPU_PARTIAL
static ssize_t cpu_partial_show(struct kmem_cache *s, char *buf)
{
return sprintf(buf, "%u\n", s->cpu_partial);
@@ -4464,6 +4486,7 @@ static ssize_t cpu_partial_store(struct
return length;
}
SLAB_ATTR(cpu_partial);
+#endif

static ssize_t ctor_show(struct kmem_cache *s, char *buf)
{
@@ -4503,6 +4526,7 @@ static ssize_t objects_partial_show(stru
}
SLAB_ATTR_RO(objects_partial);

+#ifdef CONFIG_SLUB_CPU_PARTIAL
static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf)
{
int objects = 0;
@@ -4533,6 +4557,7 @@ static ssize_t slabs_cpu_partial_show(st
return len + sprintf(buf + len, "\n");
}
SLAB_ATTR_RO(slabs_cpu_partial);
+#endif

static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf)
{
@@ -4856,11 +4881,13 @@ STAT_ATTR(DEACTIVATE_BYPASS, deactivate_
STAT_ATTR(ORDER_FALLBACK, order_fallback);
STAT_ATTR(CMPXCHG_DOUBLE_CPU_FAIL, cmpxchg_double_cpu_fail);
STAT_ATTR(CMPXCHG_DOUBLE_FAIL, cmpxchg_double_fail);
+#ifdef CONFIG_SLUB_CPU_PARTIAL
STAT_ATTR(CPU_PARTIAL_ALLOC, cpu_partial_alloc);
STAT_ATTR(CPU_PARTIAL_FREE, cpu_partial_free);
STAT_ATTR(CPU_PARTIAL_NODE, cpu_partial_node);
STAT_ATTR(CPU_PARTIAL_DRAIN, cpu_partial_drain);
#endif
+#endif

static struct attribute *slab_attrs[] = {
&slab_size_attr.attr,
@@ -4868,7 +4895,9 @@ static struct attribute *slab_attrs[] =
&objs_per_slab_attr.attr,
&order_attr.attr,
&min_partial_attr.attr,
+#ifdef CONFIG_SLUB_CPU_PARTIAL
&cpu_partial_attr.attr,
+#endif
&objects_attr.attr,
&objects_partial_attr.attr,
&partial_attr.attr,
@@ -4881,7 +4910,9 @@ static struct attribute *slab_attrs[] =
&destroy_by_rcu_attr.attr,
&shrink_attr.attr,
&reserved_attr.attr,
+#ifdef CONFIG_SLUB_CPU_PARTIAL
&slabs_cpu_partial_attr.attr,
+#endif
#ifdef CONFIG_SLUB_DEBUG
&total_objects_attr.attr,
&slabs_attr.attr,
@@ -4923,11 +4954,13 @@ static struct attribute *slab_attrs[] =
&order_fallback_attr.attr,
&cmpxchg_double_fail_attr.attr,
&cmpxchg_double_cpu_fail_attr.attr,
+#ifdef CONFIG_SLUB_CPU_PARTIAL
&cpu_partial_alloc_attr.attr,
&cpu_partial_free_attr.attr,
&cpu_partial_node_attr.attr,
&cpu_partial_drain_attr.attr,
#endif
+#endif
#ifdef CONFIG_FAILSLAB
&failslab_attr.attr,
#endif
Index: linux/init/Kconfig
===================================================================
--- linux.orig/init/Kconfig 2013-05-20 15:21:26.925704505 -0500
+++ linux/init/Kconfig 2013-05-20 15:21:26.921704441 -0500
@@ -1558,6 +1558,17 @@ config SLOB

endchoice

+config SLUB_CPU_PARTIAL
+ default y
+ depends on SLUB
+ bool "SLUB per cpu partial cache"
+ help
+ Per cpu partial caches accellerate objects allocation and freeing
+ that is local to a processor at the price of more indeterminism
+ in the latency of the free. On overflow these caches will be cleared
+ which requires the taking of locks that may cause latency spikes.
+ Typically one would choose no for a realtime system.
+
config MMAP_ALLOW_UNINITIALIZED
bool "Allow mmapped anonymous memory to be uninitialized"
depends on EXPERT && !MMU
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/