Re: [PATCH 5/5] tracing/ftrace: Introduce the big kernel lock tracer

From: Frédéric Weisbecker
Date: Tue Oct 21 2008 - 08:29:09 EST


2008/10/11 Frederic Weisbecker <fweisbec@xxxxxxxxx>:
> Introduce the Big Kernel Lock tracer.
> This new tracer lets one to observe the latencies caused
> by the lock_kernel() function.
> It captures the time when the task request the spinlock,
> the time when the spinlock is hold and the time it is released.
> This way we can measure the latency for a task that wait for this
> spinlock and the latency caused by the time this lock is hold.
>
> I will surely add some improvements like:
>
> _ Adding a stack trace to know which function locked or unlocked the kernel.
> _ Adding a script to get some statistics about this: which functions hold it
> often, which hold it for a long time and some other things....
>
> Signed-off-by: Frederic Weisbecker <fweisbec@xxxxxxxxx>
> Cc: Steven Rostedt <rostedt@xxxxxxxxxxx>
> ---
> include/trace/bkl.h | 29 ++++++++
> kernel/trace/Kconfig | 11 +++
> kernel/trace/Makefile | 1 +
> kernel/trace/trace.h | 18 +++++
> kernel/trace/trace_bkl.c | 170 ++++++++++++++++++++++++++++++++++++++++++++++
> lib/kernel_lock.c | 22 ++++++-
> 6 files changed, 249 insertions(+), 2 deletions(-)
>
> diff --git a/include/trace/bkl.h b/include/trace/bkl.h
> new file mode 100644
> index 0000000..bed2f1b
> --- /dev/null
> +++ b/include/trace/bkl.h
> @@ -0,0 +1,29 @@
> +#ifndef _TRACE_BKL_H
> +#define _TRACE_BKL_H
> +
> +
> +/* Entry which log the time when the task tried first to acquire the bkl
> + * and the time when it acquired it.
> + * That will let us know the latency to acquire the kernel lock.
> + */
> +struct bkl_trace_acquire {
> + unsigned long long acquire_req_time;
> + unsigned long long acquire_time;
> +};
> +
> +/* This will log the time when the task released the bkl.
> + * So we will know the whole duration of its kernel lock.
> + */
> +struct bkl_trace_release {
> + unsigned long long release_time;
> +};
> +
> +#ifdef CONFIG_BKL_TRACER
> +extern void trace_bkl_acquire(struct bkl_trace_acquire *trace);
> +extern void trace_bkl_release(struct bkl_trace_release *trace);
> +#else
> +static void trace_bkl_acquire(struct bkl_trace_acquire *trace) { }
> +static void trace_bkl_release(struct bkl_trace_release *trace) { }
> +#endif
> +
> +#endif
> diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
> index 396aea1..fde7700 100644
> --- a/kernel/trace/Kconfig
> +++ b/kernel/trace/Kconfig
> @@ -144,6 +144,17 @@ config STACK_TRACER
> This tracer records the max stack of the kernel, and displays
> it in debugfs/tracing/stack_trace
>
> +config BKL_TRACER
> + bool "Trace Big Kernel Lock latencies"
> + depends on HAVE_FTRACE
> + depends on DEBUG_KERNEL
> + select FTRACE
> + select STACKTRACE
> + help
> + This tracer records the latencies issued by the Big Kernel Lock.
> + It traces the time when a task request the bkl, the time when it
> + acquires it and the time when it releases it.
> +
> config DYNAMIC_FTRACE
> bool "enable/disable ftrace tracepoints dynamically"
> depends on FTRACE
> diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
> index a85dfba..2ba8904 100644
> --- a/kernel/trace/Makefile
> +++ b/kernel/trace/Makefile
> @@ -24,5 +24,6 @@ obj-$(CONFIG_NOP_TRACER) += trace_nop.o
> obj-$(CONFIG_STACK_TRACER) += trace_stack.o
> obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
> obj-$(CONFIG_BOOT_TRACER) += trace_boot.o
> +obj-$(CONFIG_BKL_TRACER) += trace_bkl.o
>
> libftrace-y := ftrace.o
> diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
> index 707620c..b1df671 100644
> --- a/kernel/trace/trace.h
> +++ b/kernel/trace/trace.h
> @@ -8,6 +8,7 @@
> #include <linux/ring_buffer.h>
> #include <linux/mmiotrace.h>
> #include <linux/ftrace.h>
> +#include <trace/bkl.h>
>
> enum trace_type {
> __TRACE_FIRST_TYPE = 0,
> @@ -22,6 +23,8 @@ enum trace_type {
> TRACE_MMIO_RW,
> TRACE_MMIO_MAP,
> TRACE_BOOT,
> + TRACE_BKL_ACQ,
> + TRACE_BKL_REL,
>
> __TRACE_LAST_TYPE
> };
> @@ -119,6 +122,17 @@ struct trace_boot {
> struct boot_trace initcall;
> };
>
> +/* Trace bkl entries */
> +struct trace_bkl_acq {
> + struct trace_entry ent;
> + struct bkl_trace_acquire bkl_acq;
> +};
> +
> +struct trace_bkl_rel {
> + struct trace_entry ent;
> + struct bkl_trace_release bkl_rel;
> +};
> +
> /*
> * trace_flag_type is an enumeration that holds different
> * states when a trace occurs. These are:
> @@ -219,6 +233,10 @@ extern void __ftrace_bad_type(void);
> IF_ASSIGN(var, ent, struct trace_mmiotrace_map, \
> TRACE_MMIO_MAP); \
> IF_ASSIGN(var, ent, struct trace_boot, TRACE_BOOT); \
> + IF_ASSIGN(var, ent, struct trace_bkl_acq, \
> + TRACE_BKL_ACQ); \
> + IF_ASSIGN(var, ent, struct trace_bkl_rel, \
> + TRACE_BKL_REL); \
> __ftrace_bad_type(); \
> } while (0)
>
> diff --git a/kernel/trace/trace_bkl.c b/kernel/trace/trace_bkl.c
> new file mode 100644
> index 0000000..169ee01
> --- /dev/null
> +++ b/kernel/trace/trace_bkl.c
> @@ -0,0 +1,170 @@
> +/*
> + * Ring buffer based Big Kernel Lock tracer
> + * for purpose of measuring the latencies issued
> + * by the BKL.
> + *
> + * Copyright (C) 2008 Frederic Weisbecker <fweisbec@xxxxxxxxx>
> + */
> +
> +#include <trace/bkl.h>
> +#include "trace.h"
> +#include <linux/ftrace.h>
> +
> +
> +static struct trace_array *bkl_trace;
> +static bool bkl_tracer_enabled;
> +
> +
> +static void bkl_trace_init(struct trace_array *tr)
> +{
> + int cpu;
> + bkl_trace = tr;
> +
> + for_each_cpu_mask(cpu, cpu_possible_map)
> + tracing_reset(tr, cpu);
> +
> + if (tr->ctrl)
> + bkl_tracer_enabled = 1;
> +}
> +
> +static void reset_bkl_trace(struct trace_array *tr)
> +{
> + bkl_tracer_enabled = 0;
> +}
> +
> +static void bkl_trace_ctrl_update(struct trace_array *tr)
> +{
> + if (tr->ctrl)
> + bkl_tracer_enabled = 1;
> + else
> + bkl_tracer_enabled = 0;
> +}
> +
> +static enum print_line_t bkl_acq_print_line(struct trace_iterator *iter)
> +{
> + int ret;
> + struct trace_entry *entry = iter->ent;
> + struct trace_bkl_acq *field = (struct trace_bkl_acq *) entry;
> + struct bkl_trace_acquire *bta = &field->bkl_acq;
> + struct trace_seq *s = &iter->seq;
> + unsigned long sec, nsec_rem;
> + unsigned long long wait_for_spin;
> +
> + wait_for_spin = bta->acquire_time - bta->acquire_req_time;
> + nsec_rem = do_div(bta->acquire_req_time, 1000000000);
> + sec = (unsigned long) bta->acquire_req_time;
> + ret = trace_seq_printf(s, "Kernel lock requested at %5lu.%09lu ",
> + sec, nsec_rem);
> + if (!ret)
> + return TRACE_TYPE_PARTIAL_LINE;
> +
> + nsec_rem = do_div(bta->acquire_time, 1000000000);
> + sec = (unsigned long) bta->acquire_time;
> + ret = trace_seq_printf(s, "acquired at %5lu.%09lu "
> + "(waited %llu nsecs)\n",
> + sec, nsec_rem,
> + wait_for_spin);
> + if (!ret)
> + return TRACE_TYPE_PARTIAL_LINE;
> + return TRACE_TYPE_HANDLED;
> +}
> +
> +static enum print_line_t bkl_rel_print_line(struct trace_iterator *iter)
> +{
> + int ret;
> + struct trace_entry *entry = iter->ent;
> + struct trace_bkl_rel *field = (struct trace_bkl_rel *) entry;
> + struct bkl_trace_release *btr = &field->bkl_rel;
> + struct trace_seq *s = &iter->seq;
> + unsigned long sec, nsec_rem;
> +
> + nsec_rem = do_div(btr->release_time, 1000000000);
> + sec = (unsigned long) btr->release_time;
> + ret = trace_seq_printf(s, "Kernel lock released at %5lu.%09lu\n",
> + sec, nsec_rem);
> + return TRACE_TYPE_HANDLED;
> +}
> +
> +static enum print_line_t bkl_trace_print_line(struct trace_iterator *iter)
> +{
> + struct trace_entry *entry = iter->ent;
> +
> + if (entry->type == TRACE_BKL_ACQ)
> + return bkl_acq_print_line(iter);
> + if (entry->type == TRACE_BKL_REL)
> + return bkl_rel_print_line(iter);
> +
> + return TRACE_TYPE_UNHANDLED;
> +}
> +
> +static struct tracer bkl_tracer __read_mostly =
> +{
> + .name = "bkl",
> + .init = bkl_trace_init,
> + .reset = reset_bkl_trace,
> + .ctrl_update = bkl_trace_ctrl_update,
> + .print_line = bkl_trace_print_line,
> +};
> +
> +__init static int init_bkl_tracer(void)
> +{
> + return register_tracer(&bkl_tracer);
> +}
> +device_initcall(init_bkl_tracer);
> +
> +/*
> + * These functions don't need us to disable
> + * the preemption since it has been done by lock_kernel()
> + */
> +
> +void trace_bkl_acquire(struct bkl_trace_acquire *trace)
> +{
> + struct ring_buffer_event *event;
> + struct trace_bkl_acq *entry;
> + struct trace_array_cpu *data;
> + unsigned long irq_flags;
> + struct trace_array *tr = bkl_trace;
> +
> + if (!bkl_tracer_enabled)
> + return;
> +
> + data = tr->data[smp_processor_id()];
> +
> + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
> + &irq_flags);
> + if (!event)
> + return;
> + entry = ring_buffer_event_data(event);
> + tracing_generic_entry_update(&entry->ent, 0, 0);
> + entry->ent.type = TRACE_BKL_ACQ;
> + entry->bkl_acq = *trace;
> + ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
> +
> + trace_wake_up();
> +}
> +
> +void trace_bkl_release(struct bkl_trace_release *trace)
> +{
> + struct ring_buffer_event *event;
> + struct trace_bkl_rel *entry;
> + struct trace_array_cpu *data;
> + unsigned long irq_flags;
> + struct trace_array *tr = bkl_trace;
> +
> + if (!bkl_tracer_enabled)
> + return;
> +
> + data = tr->data[smp_processor_id()];
> +
> + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
> + &irq_flags);
> + if (!event)
> + return;
> + entry = ring_buffer_event_data(event);
> + tracing_generic_entry_update(&entry->ent, 0, 0);
> + entry->ent.type = TRACE_BKL_REL;
> + entry->bkl_rel = *trace;
> + ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
> +
> + trace_wake_up();
> +}
> diff --git a/lib/kernel_lock.c b/lib/kernel_lock.c
> index 01a3c22..9987834 100644
> --- a/lib/kernel_lock.c
> +++ b/lib/kernel_lock.c
> @@ -9,6 +9,7 @@
> #include <linux/module.h>
> #include <linux/kallsyms.h>
> #include <linux/semaphore.h>
> +#include <trace/bkl.h>
>
> /*
> * The 'big kernel lock'
> @@ -116,16 +117,33 @@ static inline void __unlock_kernel(void)
> void __lockfunc lock_kernel(void)
> {
> int depth = current->lock_depth+1;
> - if (likely(!depth))
> + if (likely(!depth)) {
> +#ifdef CONFIG_BKL_TRACER
> + struct bkl_trace_acquire trace;
> + preempt_disable();
> + trace.acquire_req_time = cpu_clock(raw_smp_processor_id());
> + preempt_enable();
> __lock_kernel();
> + trace.acquire_time = cpu_clock(raw_smp_processor_id());
> + trace_bkl_acquire(&trace);
> +#else
> + __lock_kernel();
> +#endif
> + }
> current->lock_depth = depth;
> }
>
> void __lockfunc unlock_kernel(void)
> {
> BUG_ON(current->lock_depth < 0);
> - if (likely(--current->lock_depth < 0))
> + if (likely(--current->lock_depth < 0)) {
> +#ifdef CONFIG_BKL_TRACER
> + struct bkl_trace_release trace;
> + trace.release_time = cpu_clock(raw_smp_processor_id());
> + trace_bkl_release(&trace);
> +#endif
> __unlock_kernel();
> + }
> }
>
> EXPORT_SYMBOL(lock_kernel);
>

Are these last 15 days patches submitted to tip put in a wait queue
because of the
merge window? I hope that the reason for no comment on them is not because
they stand in a total irrelevance...

Sorry I just wonder.....
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/