Re: [PATCH 3/4] kernel/fork: switch vmapped stack callation to __vmalloc_area()

From: Konstantin Khlebnikov
Date: Tue Jan 23 2018 - 08:57:39 EST


# stress-ng --clone 100 -t 10s --metrics-brief
at 32-core machine shows boost 35000 -> 36000 bogo ops

Patch 4/4 is a kind of RFC.
Actually per-cpu cache of preallocated stacks works faster than buddy allocator thus
performance boots for it happens only at completely insane rate of clones.

On 23.01.2018 13:55, Konstantin Khlebnikov wrote:
This gives as pointer vm_struct without calling find_vm_area().

And fix comment about that task holds cache of vm area: this cache used
for retrieving actual stack pages, freeing is done by vfree_deferred().

Signed-off-by: Konstantin Khlebnikov <khlebnikov@xxxxxxxxxxxxxx>
---
kernel/fork.c | 37 +++++++++++++++----------------------
1 file changed, 15 insertions(+), 22 deletions(-)

diff --git a/kernel/fork.c b/kernel/fork.c
index 2295fc69717f..457c9151f3c8 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -204,39 +204,32 @@ static int free_vm_stack_cache(unsigned int cpu)
static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
{
#ifdef CONFIG_VMAP_STACK
- void *stack;
+ struct vm_struct *stack;
int i;
for (i = 0; i < NR_CACHED_STACKS; i++) {
- struct vm_struct *s;
-
- s = this_cpu_xchg(cached_stacks[i], NULL);
-
- if (!s)
+ stack = this_cpu_xchg(cached_stacks[i], NULL);
+ if (!stack)
continue;
#ifdef CONFIG_DEBUG_KMEMLEAK
/* Clear stale pointers from reused stack. */
- memset(s->addr, 0, THREAD_SIZE);
+ memset(stack->addr, 0, THREAD_SIZE);
#endif
- tsk->stack_vm_area = s;
- return s->addr;
+ tsk->stack_vm_area = stack;
+ return stack->addr;
}
- stack = __vmalloc_node_range(THREAD_SIZE, THREAD_ALIGN,
- VMALLOC_START, VMALLOC_END,
- THREADINFO_GFP,
- PAGE_KERNEL,
- 0, node, __builtin_return_address(0));
+ stack = __vmalloc_area(THREAD_SIZE, THREAD_ALIGN,
+ VMALLOC_START, VMALLOC_END,
+ THREADINFO_GFP, PAGE_KERNEL,
+ 0, node, __builtin_return_address(0));
+ if (unlikely(!stack))
+ return NULL;
- /*
- * We can't call find_vm_area() in interrupt context, and
- * free_thread_stack() can be called in interrupt context,
- * so cache the vm_struct.
- */
- if (stack)
- tsk->stack_vm_area = find_vm_area(stack);
- return stack;
+ /* Cache the vm_struct for stack to page conversions. */
+ tsk->stack_vm_area = stack;
+ return stack->addr;
#else
struct page *page = alloc_pages_node(node, THREADINFO_GFP,
THREAD_SIZE_ORDER);