Re: [PATCH][RESEND] tracing/ftrace: Introduce the big kernel locktracer

From: Steven Rostedt
Date: Wed Oct 22 2008 - 14:39:43 EST



On Wed, 22 Oct 2008, Frederic Weisbecker wrote:
>
> diff --git a/include/trace/bkl.h b/include/trace/bkl.h
> new file mode 100644
> index 0000000..bed2f1b
> --- /dev/null
> +++ b/include/trace/bkl.h
> @@ -0,0 +1,29 @@
> +#ifndef _TRACE_BKL_H
> +#define _TRACE_BKL_H
> +
> +
> +/* Entry which log the time when the task tried first to acquire the bkl
> + * and the time when it acquired it.
> + * That will let us know the latency to acquire the kernel lock.
> + */

We recommend the

/*
* Multi line
* comment style.
*/


> +struct bkl_trace_acquire {
> + unsigned long long acquire_req_time;
> + unsigned long long acquire_time;
> +};
> +
> +/* This will log the time when the task released the bkl.
> + * So we will know the whole duration of its kernel lock.
> + */
> +struct bkl_trace_release {
> + unsigned long long release_time;
> +};
> +
> +#ifdef CONFIG_BKL_TRACER
> +extern void trace_bkl_acquire(struct bkl_trace_acquire *trace);
> +extern void trace_bkl_release(struct bkl_trace_release *trace);
> +#else
> +static void trace_bkl_acquire(struct bkl_trace_acquire *trace) { }
> +static void trace_bkl_release(struct bkl_trace_release *trace) { }

I think you want these as "static inline"

> +#endif
> +
> +#endif
> diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
> index 396aea1..fde7700 100644
> --- a/kernel/trace/Kconfig
> +++ b/kernel/trace/Kconfig
> @@ -144,6 +144,17 @@ config STACK_TRACER
> This tracer records the max stack of the kernel, and displays
> it in debugfs/tracing/stack_trace
>
> +config BKL_TRACER
> + bool "Trace Big Kernel Lock latencies"
> + depends on HAVE_FTRACE

You don't need FTRACE for this.

> + depends on DEBUG_KERNEL
> + select FTRACE

Again, you don't need ftrace for this. You should just be able to trace
the latencies and report the places that are big. If FTRACE is configured
in, then you can also get the function trace. We have patches to replace
FTRACE with FUNCTION_TRACER to eliminate this confusion.


> + select STACKTRACE
> + help
> + This tracer records the latencies issued by the Big Kernel Lock.
> + It traces the time when a task request the bkl, the time when it
> + acquires it and the time when it releases it.
> +
> config DYNAMIC_FTRACE
> bool "enable/disable ftrace tracepoints dynamically"
> depends on FTRACE
> diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
> index a85dfba..2ba8904 100644
> --- a/kernel/trace/Makefile
> +++ b/kernel/trace/Makefile
> @@ -24,5 +24,6 @@ obj-$(CONFIG_NOP_TRACER) += trace_nop.o
> obj-$(CONFIG_STACK_TRACER) += trace_stack.o
> obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
> obj-$(CONFIG_BOOT_TRACER) += trace_boot.o
> +obj-$(CONFIG_BKL_TRACER) += trace_bkl.o
>
> libftrace-y := ftrace.o
> diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
> index 707620c..b1df671 100644
> --- a/kernel/trace/trace.h
> +++ b/kernel/trace/trace.h
> @@ -8,6 +8,7 @@
> #include <linux/ring_buffer.h>
> #include <linux/mmiotrace.h>
> #include <linux/ftrace.h>
> +#include <trace/bkl.h>
>
> enum trace_type {
> __TRACE_FIRST_TYPE = 0,
> @@ -22,6 +23,8 @@ enum trace_type {
> TRACE_MMIO_RW,
> TRACE_MMIO_MAP,
> TRACE_BOOT,
> + TRACE_BKL_ACQ,
> + TRACE_BKL_REL,
>
> __TRACE_LAST_TYPE
> };
> @@ -119,6 +122,17 @@ struct trace_boot {
> struct boot_trace initcall;
> };
>
> +/* Trace bkl entries */
> +struct trace_bkl_acq {
> + struct trace_entry ent;
> + struct bkl_trace_acquire bkl_acq;
> +};
> +
> +struct trace_bkl_rel {
> + struct trace_entry ent;
> + struct bkl_trace_release bkl_rel;
> +};
> +
> /*
> * trace_flag_type is an enumeration that holds different
> * states when a trace occurs. These are:
> @@ -219,6 +233,10 @@ extern void __ftrace_bad_type(void);
> IF_ASSIGN(var, ent, struct trace_mmiotrace_map, \
> TRACE_MMIO_MAP); \
> IF_ASSIGN(var, ent, struct trace_boot, TRACE_BOOT); \
> + IF_ASSIGN(var, ent, struct trace_bkl_acq, \
> + TRACE_BKL_ACQ); \
> + IF_ASSIGN(var, ent, struct trace_bkl_rel, \
> + TRACE_BKL_REL); \
> __ftrace_bad_type(); \
> } while (0)
>
> diff --git a/kernel/trace/trace_bkl.c b/kernel/trace/trace_bkl.c
> new file mode 100644
> index 0000000..169ee01
> --- /dev/null
> +++ b/kernel/trace/trace_bkl.c
> @@ -0,0 +1,170 @@
> +/*
> + * Ring buffer based Big Kernel Lock tracer
> + * for purpose of measuring the latencies issued
> + * by the BKL.
> + *
> + * Copyright (C) 2008 Frederic Weisbecker <fweisbec@xxxxxxxxx>
> + */
> +
> +#include <trace/bkl.h>
> +#include "trace.h"
> +#include <linux/ftrace.h>
> +
> +
> +static struct trace_array *bkl_trace;
> +static bool bkl_tracer_enabled;
> +
> +
> +static void bkl_trace_init(struct trace_array *tr)
> +{
> + int cpu;
> + bkl_trace = tr;
> +
> + for_each_cpu_mask(cpu, cpu_possible_map)
> + tracing_reset(tr, cpu);
> +
> + if (tr->ctrl)
> + bkl_tracer_enabled = 1;
> +}
> +
> +static void reset_bkl_trace(struct trace_array *tr)
> +{
> + bkl_tracer_enabled = 0;
> +}
> +
> +static void bkl_trace_ctrl_update(struct trace_array *tr)
> +{
> + if (tr->ctrl)
> + bkl_tracer_enabled = 1;
> + else
> + bkl_tracer_enabled = 0;
> +}
> +
> +static enum print_line_t bkl_acq_print_line(struct trace_iterator *iter)
> +{
> + int ret;
> + struct trace_entry *entry = iter->ent;
> + struct trace_bkl_acq *field = (struct trace_bkl_acq *) entry;

Could you use the trace_assign_type(field, entry) instead.
See trace.c for examples.


> + struct bkl_trace_acquire *bta = &field->bkl_acq;
> + struct trace_seq *s = &iter->seq;
> + unsigned long sec, nsec_rem;
> + unsigned long long wait_for_spin;
> +
> + wait_for_spin = bta->acquire_time - bta->acquire_req_time;
> + nsec_rem = do_div(bta->acquire_req_time, 1000000000);
> + sec = (unsigned long) bta->acquire_req_time;
> + ret = trace_seq_printf(s, "Kernel lock requested at %5lu.%09lu ",
> + sec, nsec_rem);
> + if (!ret)
> + return TRACE_TYPE_PARTIAL_LINE;
> +
> + nsec_rem = do_div(bta->acquire_time, 1000000000);
> + sec = (unsigned long) bta->acquire_time;
> + ret = trace_seq_printf(s, "acquired at %5lu.%09lu "
> + "(waited %llu nsecs)\n",
> + sec, nsec_rem,
> + wait_for_spin);
> + if (!ret)
> + return TRACE_TYPE_PARTIAL_LINE;
> + return TRACE_TYPE_HANDLED;
> +}
> +
> +static enum print_line_t bkl_rel_print_line(struct trace_iterator *iter)
> +{
> + int ret;
> + struct trace_entry *entry = iter->ent;
> + struct trace_bkl_rel *field = (struct trace_bkl_rel *) entry;

Same here.

> + struct bkl_trace_release *btr = &field->bkl_rel;
> + struct trace_seq *s = &iter->seq;
> + unsigned long sec, nsec_rem;
> +
> + nsec_rem = do_div(btr->release_time, 1000000000);
> + sec = (unsigned long) btr->release_time;
> + ret = trace_seq_printf(s, "Kernel lock released at %5lu.%09lu\n",
> + sec, nsec_rem);
> + return TRACE_TYPE_HANDLED;
> +}
> +
> +static enum print_line_t bkl_trace_print_line(struct trace_iterator *iter)
> +{
> + struct trace_entry *entry = iter->ent;
> +

You might want to make this into a switch instead.

> + if (entry->type == TRACE_BKL_ACQ)
> + return bkl_acq_print_line(iter);
> + if (entry->type == TRACE_BKL_REL)
> + return bkl_rel_print_line(iter);
> +
> + return TRACE_TYPE_UNHANDLED;
> +}
> +
> +static struct tracer bkl_tracer __read_mostly =
> +{
> + .name = "bkl",
> + .init = bkl_trace_init,
> + .reset = reset_bkl_trace,
> + .ctrl_update = bkl_trace_ctrl_update,
> + .print_line = bkl_trace_print_line,
> +};
> +
> +__init static int init_bkl_tracer(void)
> +{
> + return register_tracer(&bkl_tracer);
> +}
> +device_initcall(init_bkl_tracer);
> +
> +/*
> + * These functions don't need us to disable
> + * the preemption since it has been done by lock_kernel()
> + */
> +
> +void trace_bkl_acquire(struct bkl_trace_acquire *trace)
> +{
> + struct ring_buffer_event *event;
> + struct trace_bkl_acq *entry;
> + struct trace_array_cpu *data;
> + unsigned long irq_flags;
> + struct trace_array *tr = bkl_trace;
> +
> + if (!bkl_tracer_enabled)
> + return;
> +
> + data = tr->data[smp_processor_id()];

data is unused.

> +
> + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
> + &irq_flags);
> + if (!event)
> + return;
> + entry = ring_buffer_event_data(event);
> + tracing_generic_entry_update(&entry->ent, 0, 0);
> + entry->ent.type = TRACE_BKL_ACQ;
> + entry->bkl_acq = *trace;
> + ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
> +
> + trace_wake_up();
> +}
> +
> +void trace_bkl_release(struct bkl_trace_release *trace)
> +{
> + struct ring_buffer_event *event;
> + struct trace_bkl_rel *entry;
> + struct trace_array_cpu *data;
> + unsigned long irq_flags;
> + struct trace_array *tr = bkl_trace;
> +
> + if (!bkl_tracer_enabled)
> + return;
> +
> + data = tr->data[smp_processor_id()];

data is unused here too.

> +
> + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
> + &irq_flags);
> + if (!event)
> + return;
> + entry = ring_buffer_event_data(event);
> + tracing_generic_entry_update(&entry->ent, 0, 0);
> + entry->ent.type = TRACE_BKL_REL;
> + entry->bkl_rel = *trace;
> + ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
> +
> + trace_wake_up();
> +}
> diff --git a/lib/kernel_lock.c b/lib/kernel_lock.c
> index 01a3c22..9987834 100644
> --- a/lib/kernel_lock.c
> +++ b/lib/kernel_lock.c
> @@ -9,6 +9,7 @@
> #include <linux/module.h>
> #include <linux/kallsyms.h>
> #include <linux/semaphore.h>
> +#include <trace/bkl.h>
>
> /*
> * The 'big kernel lock'
> @@ -116,16 +117,33 @@ static inline void __unlock_kernel(void)
> void __lockfunc lock_kernel(void)
> {
> int depth = current->lock_depth+1;
> - if (likely(!depth))
> + if (likely(!depth)) {
> +#ifdef CONFIG_BKL_TRACER
> + struct bkl_trace_acquire trace;
> + preempt_disable();
> + trace.acquire_req_time = cpu_clock(raw_smp_processor_id());
> + preempt_enable();
> __lock_kernel();
> + trace.acquire_time = cpu_clock(raw_smp_processor_id());

Note, the cpu retuned here may not be the same as the above processor id.

> + trace_bkl_acquire(&trace);
> +#else
> + __lock_kernel();
> +#endif

The above looks a bit ugly. Could you do something perhaps at the
beginning of the file like:

#ifdef CONFIG_BKL_TRACER
static void lock_kernel_trace(void) {
struct bkl_trace_acquire trace;

preempt_disable();
trace.acquire_req_time = cpu_clock(raw_smp_processor_id());
preempt_enable();
__lock_kernel();
trace.acquire_time = cpu_clock(raw_smp_processor_id());
trace_bkl_acquire(&trace);
}
#else
# define lock_kernel_trace() __lock_kernel();
#endif

And then just add the lock_kernel_trace within the code.

> + }
> current->lock_depth = depth;
> }
>
> void __lockfunc unlock_kernel(void)
> {
> BUG_ON(current->lock_depth < 0);
> - if (likely(--current->lock_depth < 0))
> + if (likely(--current->lock_depth < 0)) {
> +#ifdef CONFIG_BKL_TRACER
> + struct bkl_trace_release trace;
> + trace.release_time = cpu_clock(raw_smp_processor_id());
> + trace_bkl_release(&trace);
> +#endif

Same here. Have an unlock_kernel_trace();

> __unlock_kernel();
> + }
> }
>
> EXPORT_SYMBOL(lock_kernel);
>
>

-- Steve

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/